提交 4a92346b 编写于 作者: M Megvii Engine Team

refactor(mgb): refactor group conv3d

GitOrigin-RevId: 15360a3a417b34da99967525de174def66945060
上级 6ce212d2
......@@ -24,21 +24,7 @@ Convolution3DBackwardDataImpl::AlgoPack::AlgoPack() {
for (auto &&i: cudnn) {
all_algos.push_back(&i);
}
all_algos.reserve(all_algos.size() * 2);
// add gconv algos by AlgoGroupConvGeneral
auto all_algos_data = all_algos.data();
for (size_t i = 1; i < all_algos.size(); ++ i) {
gconv.push_back({all_algos[i]});
}
for (size_t i = 1; i < all_algos.size(); ++ i) {
algo2gconv[all_algos[i]] = &gconv[i - 1];
}
for (auto &&i: gconv) {
all_algos.push_back(&i);
}
megdnn_assert(all_algos_data == all_algos.data());
all_algos.push_back(&group);
for (auto&& algo : all_algos) {
m_all_algos_map.emplace(algo->info().desc, algo);
......@@ -61,27 +47,26 @@ Convolution3DBackwardDataImpl::AlgoPack::cudnn_from_enum(
Convolution3DBackwardDataImpl::AlgoPack Convolution3DBackwardDataImpl::sm_algo_pack;
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardDataImpl *o,
const TensorLayout &filter, const TensorLayout &diff,
const TensorLayout &grad):
SizeArgs(o, o->make_canonized_filter_meta(grad.ndim, filter), diff, grad)
{
}
const Convolution3DBackwardDataImpl* o, const TensorLayout& filter,
const TensorLayout& diff, const TensorLayout& grad)
: SizeArgs(o, filter, o->make_canonized_filter_meta(grad.ndim, filter),
diff, grad) {}
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardDataImpl *o,
const CanonizedFilterMeta &filter, const TensorLayout &diff,
const Convolution3DBackwardDataImpl *o, const TensorLayout& filter,
const CanonizedFilterMeta &filter_meta, const TensorLayout &diff,
const TensorLayout &grad):
handle{concrete_handle(o->handle())},
filter_meta{filter},
filter_meta{filter_meta},
diff_layout{&diff},
grad_layout{&grad},
filter_layout{&filter},
opr{o}
{
}
Convolution3DBackwardDataImpl::AlgoBase::ExecArgs::ExecArgs(
Convolution3DBackwardDataImpl *opr,
const Convolution3DBackwardDataImpl *opr,
_megdnn_tensor_in filter,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
......
......@@ -42,31 +42,33 @@ public:
struct SizeArgs {
HandleImpl* handle;
CanonizedFilterMeta filter_meta;
const TensorLayout *diff_layout, *grad_layout;
Convolution3DBackwardDataImpl* opr;
const TensorLayout *diff_layout, *grad_layout, *filter_layout;
const Convolution3DBackwardDataImpl* opr;
std::string to_string() const;
void init_desc(convolution3d::CUDNNBwdDataDescs& desc) const {
desc.set(filter_meta, *diff_layout, *grad_layout, opr->param());
}
SizeArgs(Convolution3DBackwardDataImpl* opr, const TensorLayout& filter,
const TensorLayout& diff, const TensorLayout& grad);
SizeArgs(Convolution3DBackwardDataImpl* opr,
const CanonizedFilterMeta& filter, const TensorLayout& diff,
SizeArgs(const Convolution3DBackwardDataImpl* opr,
const TensorLayout& filter, const TensorLayout& diff,
const TensorLayout& grad);
SizeArgs(const Convolution3DBackwardDataImpl* opr,
const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta,
const TensorLayout& diff, const TensorLayout& grad);
convolution3d::ForwardSizeArgs as_fwd_args() const {
return {handle, grad_layout, filter_meta, diff_layout,
opr->param().data_type};
return {handle, grad_layout, filter_layout,
filter_meta, diff_layout, opr->param().data_type};
}
};
struct ExecArgs : public SizeArgs {
const TensorND *filter_tensor, *diff_tensor, *grad_tensor;
Workspace workspace;
ExecArgs(Convolution3DBackwardDataImpl* opr, _megdnn_tensor_in filter,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace);
ExecArgs(const Convolution3DBackwardDataImpl* opr,
_megdnn_tensor_in filter, _megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_workspace workspace);
};
virtual bool is_available(const SizeArgs& args) const = 0;
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0;
......@@ -154,29 +156,25 @@ public:
//! implement group conv by another algo
class Convolution3DBackwardDataImpl::AlgoGroupConvGeneral final
: public AlgoBase {
AlgoBase* m_impl;
std::string m_name;
public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override;
std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
const char* name() const override { return m_name.c_str(); }
const char* name() const override {
return "CUDA:GROUP_CONV3D_BACKWARD_DATA";
}
static void modify_size_args(SizeArgs& args, TensorLayout& diff_pg,
TensorLayout& grad_pg);
AlgoAttribute attribute() const override {
auto ret = static_cast<AlgoAttribute>(0);
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
return ret;
return AlgoAttribute::REPRODUCIBLE;
}
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
};
class Convolution3DBackwardDataImpl::AlgoPack : NonCopyableObj {
......@@ -190,8 +188,7 @@ public:
std::vector<AlgoCUDNN> cudnn;
AlgoChanwise chanwise;
std::vector<AlgoGroupConvGeneral> gconv;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoGroupConvGeneral group;
std::vector<AlgoBase*>
//! all algorithms
......
......@@ -15,68 +15,121 @@ using namespace megdnn;
using namespace cuda;
using namespace convolution3d;
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args(
Convolution3DBackwardDataImpl::AlgoBase::SizeArgs &args,
TensorLayout &diff_pg, TensorLayout &grad_pg) {
diff_pg = *args.diff_layout;
grad_pg = *args.grad_layout;
namespace {
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param>
sub_opr_config(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) {
SmallVector<size_t> flt_shape(0);
std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.filter_layout->ndim > args.grad_layout->ndim)
++idx;
for (; idx < args.filter_layout->ndim; ++idx) {
flt_shape.push_back(args.filter_layout->shape[idx]);
flt_stride.push_back(args.filter_layout->stride[idx]);
}
TensorLayout filter_pg(flt_shape, flt_stride,
args.filter_layout->dtype,
args.filter_layout->format);
TensorLayout diff_pg = *args.diff_layout;
TensorLayout grad_pg = *args.grad_layout;
auto nr_grp = args.filter_meta.group;
args.filter_meta.group = 1;
diff_pg.shape[1] /= nr_grp;
grad_pg.shape[1] /= nr_grp;
args.diff_layout = &diff_pg;
args.grad_layout = &grad_pg;
size_t c_pos = 1;
diff_pg.shape[c_pos] /= nr_grp;
grad_pg.shape[c_pos] /= nr_grp;
megdnn::param::Convolution3D param = args.opr->param();
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE;
std::pair<TensorLayoutArray, Convolution3DBackwardDataImpl::Param> ret;
ret.first = {filter_pg, diff_pg, grad_pg};
ret.second = param;
return ret;
}
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
AlgoBase *impl):
m_impl{impl}
{
m_name = "group_conv3d:";
m_name += impl->name();
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardData>>
prepare_sub_opr(const Convolution3DBackwardDataImpl::AlgoBase::SizeArgs& args) {
auto conv3d_backdata_opr =
args.handle->create_operator<Convolution3DBackwardData>();
set_execution_policy<Convolution3DBackwardData, Convolution3DBackwardData*>(
args.opr, conv3d_backdata_opr.get());
auto&& config = sub_opr_config(args);
conv3d_backdata_opr->param() = config.second;
return {config.first, std::move(conv3d_backdata_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{
static_cast<const Convolution3DBackwardDataImpl*>(opr), layouts[0],
layouts[1], layouts[2]};
auto&& config = sub_opr_config(args);
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_DATA, param_str,
config.first}};
}
bool Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::is_available(
const SizeArgs &args) const {
if (args.filter_meta.group <= 1)
return false;
auto sub_args = args;
TensorLayout diff_pg, grad_pg;
modify_size_args(sub_args, diff_pg, grad_pg);
return m_impl->is_available(sub_args);
if (args.filter_meta.format != Param::Format::NCDHW) {
return false;
}
auto config = prepare_sub_opr(args);
return get_algorithm(
static_cast<Convolution3DBackwardDataImpl*>(config.second.get()),
config.first[0], config.first[1], config.first[2]);
}
WorkspaceBundle
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_bundle(
void* ptr, const SizeArgs& args) const {
auto config = prepare_sub_opr(args);
size_t sizes = config.second->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2]);
return {ptr, {sizes}};
}
size_t Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::
get_workspace_in_bytes(const SizeArgs &args) const {
auto sub_args = args;
TensorLayout diff_pg, grad_pg;
modify_size_args(sub_args, diff_pg, grad_pg);
return m_impl->get_workspace_in_bytes(sub_args);
size_t
Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
const SizeArgs& args) const {
return get_workspace_bundle(nullptr, args).total_size_in_bytes();
}
void Convolution3DBackwardDataImpl::AlgoGroupConvGeneral::exec(
const ExecArgs &args) const {
auto sub_args = args;
TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor},
tgrad{*args.grad_tensor};
modify_size_args(sub_args, tdiff.layout, tgrad.layout);
sub_args.filter_tensor = &tflt;
sub_args.diff_tensor = &tdiff;
sub_args.grad_tensor = &tgrad;
auto grp = args.filter_meta.group;
auto &&fm = args.filter_meta;
auto strd_flt = (fm.icpg * fm.ocpg *
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tflt.layout.dtype.size()),
strd_diff = (
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()),
strd_grad = (
tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size());
for (uint32_t g = 0; g < grp; ++ g) {
m_impl->exec(sub_args);
incr_voidp(tflt.raw_ptr, strd_flt);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
const ExecArgs& args) const {
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
{
auto config = prepare_sub_opr(args);
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[0]};
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
size_t c_pos = 1;
auto grp = args.filter_meta.group;
auto&& fm = args.filter_meta;
auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
fm.spatial[2] * tfilter.layout.dtype.size()),
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg *
tdiff.layout.dtype.size()),
strd_grad = (tgrad.layout.stride[c_pos] * fm.icpg *
tgrad.layout.dtype.size());
for (uint32_t g = 0; g < grp; ++g) {
config.second->exec(tfilter, tdiff, tgrad, bundle.get_workspace(0));
incr_voidp(tfilter.raw_ptr, strd_flt);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
}
}
}
......
......@@ -26,21 +26,7 @@ Convolution3DBackwardFilterImpl::AlgoPack::AlgoPack() {
}
all_algos.push_back(&inplace_matmul);
all_algos.reserve(all_algos.size() * 2);
// add gconv algos by AlgoGroupConvGeneral
auto all_algos_data = all_algos.data();
for (size_t i = 1; i < all_algos.size(); ++ i) {
gconv.push_back({all_algos[i]});
}
for (size_t i = 1; i < all_algos.size(); ++ i) {
algo2gconv[all_algos[i]] = &gconv[i - 1];
}
for (auto &&i: gconv) {
all_algos.push_back(&i);
}
megdnn_assert(all_algos_data == all_algos.data());
non_cudnn_algos.push_back(all_algos.rbegin()[0]); //group inplace_matmul
all_algos.push_back(&group);
for (auto&& algo : all_algos) {
m_all_algos_map.emplace(algo->info().desc, algo);
......@@ -64,27 +50,26 @@ Convolution3DBackwardFilterImpl::AlgoPack
Convolution3DBackwardFilterImpl::sm_algo_pack;
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardFilterImpl *o,
const Convolution3DBackwardFilterImpl *o,
const TensorLayout &src, const TensorLayout &diff,
const TensorLayout &grad):
SizeArgs(o, src, diff, o->make_canonized_filter_meta(src.ndim, grad))
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad))
{
}
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DBackwardFilterImpl *o,
const TensorLayout &src, const TensorLayout &diff,
const CanonizedFilterMeta &grad):
handle{concrete_handle(o->handle())},
src_layout{&src},
diff_layout{&diff},
grad_filter_meta{grad},
opr{o}
{
}
const Convolution3DBackwardFilterImpl* o, const TensorLayout& src,
const TensorLayout& diff, const TensorLayout& grad,
const CanonizedFilterMeta& grad_meta)
: handle{concrete_handle(o->handle())},
src_layout{&src},
diff_layout{&diff},
grad_layout{&grad},
grad_filter_meta{grad_meta},
opr{o} {}
Convolution3DBackwardFilterImpl::AlgoBase::ExecArgs::ExecArgs(
Convolution3DBackwardFilterImpl *opr,
const Convolution3DBackwardFilterImpl *opr,
_megdnn_tensor_in src,
_megdnn_tensor_in diff,
_megdnn_tensor_out grad,
......
......@@ -36,31 +36,34 @@ public:
struct SizeArgs {
HandleImpl* handle;
const TensorLayout *src_layout, *diff_layout;
const TensorLayout *src_layout, *diff_layout, *grad_layout;
CanonizedFilterMeta grad_filter_meta;
Convolution3DBackwardFilterImpl* opr;
const Convolution3DBackwardFilterImpl* opr;
std::string to_string() const;
void init_desc(convolution3d::CUDNNBwdFilterDescs& desc) const {
desc.set(*src_layout, *diff_layout, grad_filter_meta, opr->param());
}
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src,
const TensorLayout& diff, const TensorLayout& grad);
SizeArgs(Convolution3DBackwardFilterImpl* opr, const TensorLayout& src,
const TensorLayout& diff, const CanonizedFilterMeta& grad);
SizeArgs(const Convolution3DBackwardFilterImpl* opr,
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad);
SizeArgs(const Convolution3DBackwardFilterImpl* opr,
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad,
const CanonizedFilterMeta& grad_meta);
convolution3d::ForwardSizeArgs as_fwd_args() const {
return {handle, src_layout, grad_filter_meta, diff_layout,
opr->param().data_type};
return {handle, src_layout, grad_layout,
grad_filter_meta, diff_layout, opr->param().data_type};
}
};
struct ExecArgs : public SizeArgs {
const TensorND *src_tensor, *diff_tensor, *grad_tensor;
Workspace workspace;
ExecArgs(Convolution3DBackwardFilterImpl* opr, _megdnn_tensor_in src,
_megdnn_tensor_in diff, _megdnn_tensor_out grad,
_megdnn_workspace workspace);
ExecArgs(const Convolution3DBackwardFilterImpl* opr,
_megdnn_tensor_in src, _megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_workspace workspace);
};
virtual bool is_available(const SizeArgs& args) const = 0;
virtual size_t get_workspace_in_bytes(const SizeArgs& args) const = 0;
......@@ -162,30 +165,25 @@ public:
//! implement group conv by another algo
class Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral final
: public AlgoBase {
AlgoBase* m_impl;
std::string m_name;
public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override;
std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
const char* name() const override { return m_name.c_str(); }
const char* name() const override {
return "CUDA:GROUP_CONV3D_BACKWARD_FILTER";
}
AlgoAttribute attribute() const override {
auto ret = static_cast<AlgoAttribute>(0);
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
return ret;
return AlgoAttribute::REPRODUCIBLE;
}
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg,
TensorLayout& diff_pg);
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
};
class Convolution3DBackwardFilterImpl::AlgoPack : NonCopyableObj {
......@@ -200,8 +198,7 @@ public:
std::vector<AlgoCUDNN> cudnn;
AlgoInplaceMatmul inplace_matmul;
AlgoChanwise chanwise;
std::vector<AlgoGroupConvGeneral> gconv;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoGroupConvGeneral group;
std::vector<AlgoBase*>
//! all algorithms
......
......@@ -15,69 +15,123 @@ using namespace megdnn;
using namespace cuda;
using namespace convolution3d;
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::modify_size_args(
Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs &args,
TensorLayout &src_pg, TensorLayout &diff_pg) {
src_pg = *args.src_layout;
diff_pg = *args.diff_layout;
namespace {
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param>
sub_opr_config(
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) {
SmallVector<size_t> flt_shape(0);
std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.grad_layout->ndim > args.src_layout->ndim)
++idx;
for (; idx < args.grad_layout->ndim; ++idx) {
flt_shape.push_back(args.grad_layout->shape[idx]);
flt_stride.push_back(args.grad_layout->stride[idx]);
}
TensorLayout grad_pg(flt_shape, flt_stride, args.grad_layout->dtype,
args.grad_layout->format);
TensorLayout src_pg = *args.src_layout;
TensorLayout diff_pg = *args.diff_layout;
auto nr_grp = args.grad_filter_meta.group;
args.grad_filter_meta.group = 1;
src_pg.shape[1] /= nr_grp;
diff_pg.shape[1] /= nr_grp;
args.src_layout = &src_pg;
args.diff_layout = &diff_pg;
size_t c_pos = 1;
src_pg.shape[c_pos] /= nr_grp;
diff_pg.shape[c_pos] /= nr_grp;
megdnn::param::Convolution3D param = args.opr->param();
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE;
std::pair<TensorLayoutArray, Convolution3DBackwardFilterImpl::Param> ret;
ret.first = {src_pg, diff_pg, grad_pg};
ret.second = param;
return ret;
}
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
AlgoBase *impl):
m_impl{impl}
{
m_name = "group_conv3d:";
m_name += impl->name();
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DBackwardFilter>>
prepare_sub_opr(
const Convolution3DBackwardFilterImpl::AlgoBase::SizeArgs& args) {
auto conv3d_backfilter_opr =
args.handle->create_operator<Convolution3DBackwardFilter>();
set_execution_policy<Convolution3DBackwardFilter,
Convolution3DBackwardFilter*>(
args.opr, conv3d_backfilter_opr.get());
auto&& config = sub_opr_config(args);
conv3d_backfilter_opr->param() = config.second;
return {config.first, std::move(conv3d_backfilter_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{
static_cast<const Convolution3DBackwardFilterImpl*>(opr),
layouts[0], layouts[1], layouts[2]};
auto&& config = sub_opr_config(args);
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION3D_BACKWARD_FILTER, param_str,
config.first}};
}
bool Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::is_available(
const SizeArgs &args) const {
const SizeArgs& args) const {
if (args.grad_filter_meta.group <= 1)
return false;
auto sub_args = args;
TensorLayout src_pg, diff_pg;
modify_size_args(sub_args, src_pg, diff_pg);
return m_impl->is_available(sub_args);
if (args.grad_filter_meta.format != Param::Format::NCDHW) {
return false;
}
auto config = prepare_sub_opr(args);
return get_algorithm(
static_cast<Convolution3DBackwardFilterImpl*>(config.second.get()),
config.first[0], config.first[1], config.first[2]);
}
WorkspaceBundle
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_bundle(
void* ptr, const SizeArgs& args) const {
auto config = prepare_sub_opr(args);
size_t sizes = config.second->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2]);
return {ptr, {sizes}};
}
size_t Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::
get_workspace_in_bytes(const SizeArgs &args) const {
auto sub_args = args;
TensorLayout src_pg, diff_pg;
modify_size_args(sub_args, src_pg, diff_pg);
return m_impl->get_workspace_in_bytes(sub_args);
size_t
Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
const SizeArgs& args) const {
return get_workspace_bundle(nullptr, args).total_size_in_bytes();
}
void Convolution3DBackwardFilterImpl::AlgoGroupConvGeneral::exec(
const ExecArgs &args) const {
auto sub_args = args;
TensorND tsrc{*args.src_tensor}, tdiff{*args.diff_tensor},
tgrad{*args.grad_tensor};
modify_size_args(sub_args, tsrc.layout, tdiff.layout);
sub_args.src_tensor = &tsrc;
sub_args.diff_tensor = &tdiff;
sub_args.grad_tensor = &tgrad;
auto &&fm = args.grad_filter_meta;
auto grp = fm.group;
auto strd_src = (
tsrc.layout.stride[1] * fm.icpg * tsrc.layout.dtype.size()),
strd_diff = (
tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()),
strd_grad = (fm.icpg * fm.ocpg *
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] * tgrad.layout.dtype.size());
for (uint32_t g = 0; g < grp; ++ g) {
m_impl->exec(sub_args);
incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
const ExecArgs& args) const {
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
{
auto config = prepare_sub_opr(args);
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]};
TensorND tdiff{args.diff_tensor->raw_ptr, config.first[1]};
TensorND tgrad{args.grad_tensor->raw_ptr, config.first[2]};
size_t c_pos = 1;
auto grp = args.grad_filter_meta.group;
auto&& fm = args.grad_filter_meta;
auto strd_src = (tsrc.layout.stride[c_pos] * fm.icpg *
tsrc.layout.dtype.size()),
strd_diff = (tdiff.layout.stride[c_pos] * fm.ocpg *
tdiff.layout.dtype.size()),
strd_grad = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
fm.spatial[2] * tgrad.layout.dtype.size());
for (uint32_t g = 0; g < grp; ++g) {
config.second->exec(tsrc, tdiff, tgrad, bundle.get_workspace(0));
incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdiff.raw_ptr, strd_diff);
incr_voidp(tgrad.raw_ptr, strd_grad);
}
}
}
......
......@@ -28,22 +28,7 @@ Convolution3DForwardImpl::AlgoPack::AlgoPack() {
}
all_algos.push_back(&inplace_matmul);
all_algos.push_back(&a1x1x1);
all_algos.reserve(all_algos.size() * 2);
// add gconv algos by AlgoGroupConvGeneral
auto all_algos_data = all_algos.data();
for (size_t i = 1; i < all_algos.size(); ++ i) {
gconv.push_back({all_algos[i]});
}
for (size_t i = 1; i < all_algos.size(); ++ i) {
algo2gconv[all_algos[i]] = &gconv[i - 1];
}
for (auto &&i: gconv) {
all_algos.push_back(&i);
}
megdnn_assert(all_algos_data == all_algos.data());
non_cudnn_algos.push_back(all_algos.rbegin()[1]); // group inplace_matmul
non_cudnn_algos.push_back(all_algos.rbegin()[0]); // group 1x1x1
all_algos.push_back(&group);
for (auto&& algo : all_algos) {
m_all_algos_map.emplace(algo->info().desc, algo);
......@@ -66,28 +51,25 @@ Convolution3DForwardImpl::AlgoPack::cudnn_from_enum(
Convolution3DForwardImpl::AlgoPack Convolution3DForwardImpl::sm_algo_pack;
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DForwardImpl *o,
const TensorLayout &src, const TensorLayout &filter,
const TensorLayout &dst):
SizeArgs(o, src, o->make_canonized_filter_meta(src.ndim, filter), dst)
{
}
const Convolution3DForwardImpl* o, const TensorLayout& src,
const TensorLayout& filter, const TensorLayout& dst)
: SizeArgs(o, src, filter,
o->make_canonized_filter_meta(src.ndim, filter), dst) {}
Convolution3DForwardImpl::AlgoBase::SizeArgs::SizeArgs(
Convolution3DForwardImpl *o,
const TensorLayout &src, const CanonizedFilterMeta &filter,
const TensorLayout &dst):
ForwardSizeArgs{
concrete_handle(o->handle()),
&src, filter, &dst,
o->param().data_type
},
opr{o}
{
}
const Convolution3DForwardImpl* o, const TensorLayout& src,
const TensorLayout& filter, const CanonizedFilterMeta& filter_meta,
const TensorLayout& dst)
: ForwardSizeArgs{concrete_handle(o->handle()),
&src,
&filter,
filter_meta,
&dst,
o->param().data_type},
opr{o} {}
Convolution3DForwardImpl::AlgoBase::ExecArgs::ExecArgs(
Convolution3DForwardImpl *opr,
const Convolution3DForwardImpl *opr,
_megdnn_tensor_in src,
_megdnn_tensor_in filter,
_megdnn_tensor_out dst,
......
......@@ -48,22 +48,24 @@ public:
AlgoBase() : Algorithm() { m_handle_type = Handle::HandleType::CUDA; }
struct SizeArgs : public convolution3d::ForwardSizeArgs {
Convolution3DForwardImpl* opr;
const Convolution3DForwardImpl* opr;
std::string to_string() const;
void init_desc(convolution3d::CUDNNForwardDescs& desc) const {
desc.set(*src_layout, filter_meta, *dst_layout, opr->param());
}
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src,
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src,
const TensorLayout& filter, const TensorLayout& dst);
SizeArgs(Convolution3DForwardImpl* opr, const TensorLayout& src,
const CanonizedFilterMeta& filter, const TensorLayout& dst);
SizeArgs(const Convolution3DForwardImpl* opr, const TensorLayout& src,
const TensorLayout& filter,
const CanonizedFilterMeta& filter_meta,
const TensorLayout& dst);
};
struct ExecArgs : public SizeArgs {
const TensorND *src_tensor, *filter_tensor, *dst_tensor;
Workspace workspace;
ExecArgs(Convolution3DForwardImpl* opr, _megdnn_tensor_in src,
ExecArgs(const Convolution3DForwardImpl* opr, _megdnn_tensor_in src,
_megdnn_tensor_in filter, _megdnn_tensor_out dst,
_megdnn_workspace workspace);
};
......@@ -114,35 +116,22 @@ public:
//! implement group conv by another algo
class Convolution3DForwardImpl::AlgoGroupConvGeneral final : public AlgoBase {
AlgoBase* m_impl;
std::string m_name;
public:
AlgoGroupConvGeneral(AlgoBase* impl);
bool is_available(const SizeArgs& args) const override;
size_t get_workspace_in_bytes(const SizeArgs& args) const override;
void exec(const ExecArgs& args) const override;
std::vector<SearchItem> get_subopr_list(
const TensorLayoutArray& layouts,
const OperatorBase* opr) const override;
const char* name() const override { return m_name.c_str(); }
const char* name() const override { return "CUDA:GROUP_CONV3D_FORWARD"; }
AlgoAttribute attribute() const override {
auto ret = AlgoAttribute::DEFAULT;
if (m_impl->contain_attribute_all(AlgoAttribute::REPRODUCIBLE)) {
ret |= AlgoAttribute::REPRODUCIBLE;
}
#define cb(attr) \
if (m_impl->contain_attribute_all(attr)) { \
ret |= attr; \
}
MEGDNN_FOREACH_ALGO_ATTRIBUTE_INHERITABLE(cb)
#undef cb
return ret;
return AlgoAttribute::REPRODUCIBLE;
}
static void modify_size_args(SizeArgs& args, TensorLayout& src_pg,
TensorLayout& dst_pg);
MEGDNN_DECL_ALGO_TYPE(CUDA_GROUP_CONV_GENERAL)
private:
WorkspaceBundle get_workspace_bundle(void* ptr, const SizeArgs& args) const;
};
class Convolution3DForwardImpl::AlgoCUDNN final : public AlgoBase {
......@@ -226,8 +215,7 @@ public:
Algo1x1x1 a1x1x1;
AlgoInplaceMatmul inplace_matmul;
AlgoChanwise chanwise;
std::vector<AlgoGroupConvGeneral> gconv;
std::unordered_map<AlgoBase*, AlgoGroupConvGeneral*> algo2gconv;
AlgoGroupConvGeneral group;
std::vector<AlgoBase*>
//! all algorithms
......
......@@ -15,84 +15,136 @@ using namespace megdnn;
using namespace cuda;
using namespace convolution3d;
void Convolution3DForwardImpl::AlgoGroupConvGeneral::modify_size_args(
Convolution3DForwardImpl::AlgoBase::SizeArgs &args,
TensorLayout &src_pg, TensorLayout &dst_pg) {
src_pg = *args.src_layout;
dst_pg = *args.dst_layout;
namespace {
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> sub_opr_config(
const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) {
TensorLayout src_pg = *args.src_layout;
SmallVector<size_t> flt_shape(0);
std::vector<ptrdiff_t> flt_stride(0);
size_t idx = 0;
// check if the first dim is group
if (args.filter_layout->ndim > args.src_layout->ndim)
++idx;
for (; idx < args.filter_layout->ndim; ++idx) {
flt_shape.push_back(args.filter_layout->shape[idx]);
flt_stride.push_back(args.filter_layout->stride[idx]);
}
TensorLayout filter_pg(flt_shape, flt_stride,
args.filter_layout->dtype,
args.filter_layout->format);
TensorLayout dst_pg = *args.dst_layout;
auto nr_grp = args.filter_meta.group;
args.filter_meta.group = 1;
size_t c_pos;
if (args.filter_meta.format == Param::Format::NCDHW) {
if (args.filter_meta.format == param::Convolution3D::Format::NCDHW) {
c_pos = 1;
} else {
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC,
megdnn_assert(
args.filter_meta.format == param::Convolution3D::Format::NDHWC,
"invalid conv format");
c_pos = 4;
}
src_pg.shape[c_pos] /= nr_grp;
dst_pg.shape[c_pos] /= nr_grp;
args.src_layout = &src_pg;
args.dst_layout = &dst_pg;
megdnn::param::Convolution3D param = args.opr->param();
param.sparse = megdnn::param::Convolution3D::Sparse::DENSE;
std::pair<TensorLayoutArray, Convolution3DForwardImpl::Param> ret;
ret.first = {src_pg, filter_pg, dst_pg};
ret.second = param;
return ret;
}
std::pair<TensorLayoutArray, std::unique_ptr<Convolution3DForward>>
prepare_sub_opr(const Convolution3DForwardImpl::AlgoBase::SizeArgs& args) {
auto conv3d_opr = args.handle->create_operator<Convolution3D>();
set_execution_policy<Convolution3DForward, Convolution3DForward*>(
args.opr, conv3d_opr.get());
auto&& config = sub_opr_config(args);
conv3d_opr->param() = config.second;
return {config.first, std::move(conv3d_opr)};
}
} // namespace
std::vector<Algorithm::SearchItem>
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
AlgoBase::SizeArgs args{static_cast<const Convolution3DForwardImpl*>(opr),
layouts[0], layouts[1], layouts[2]};
auto&& config = sub_opr_config(args);
Convolution3DForwardImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
AlgoBase *impl):
m_impl{impl} {
m_name = "group_conv3d:";
m_name += impl->name();
std::string param_str;
Algorithm::serialize_write_pod(config.second, param_str);
return {{Algorithm::OprType::CONVOLUTION3D_FORWARD, param_str,
config.first}};
}
bool Convolution3DForwardImpl::AlgoGroupConvGeneral::is_available(
const SizeArgs &args) const {
if (args.filter_meta.group <= 1)
return false;
auto sub_args = args;
TensorLayout src_pg, dst_pg;
modify_size_args(sub_args, src_pg, dst_pg);
return m_impl->is_available(sub_args);
if (args.filter_meta.format != Param::Format::NCDHW &&
args.filter_meta.format != Param::Format::NDHWC) {
return false;
}
auto config = prepare_sub_opr(args);
return get_algorithm(
static_cast<Convolution3DForwardImpl*>(config.second.get()),
config.first[0], config.first[1], config.first[2]);
}
WorkspaceBundle
Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_bundle(
void* ptr, const SizeArgs& args) const {
auto config = prepare_sub_opr(args);
size_t sizes = config.second->get_workspace_in_bytes(
config.first[0], config.first[1], config.first[2]);
return {ptr, {sizes}};
}
size_t Convolution3DForwardImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
const SizeArgs &args) const {
auto sub_args = args;
TensorLayout src_pg, dst_pg;
modify_size_args(sub_args, src_pg, dst_pg);
return m_impl->get_workspace_in_bytes(sub_args);
const SizeArgs& args) const {
return get_workspace_bundle(nullptr, args).total_size_in_bytes();
}
void Convolution3DForwardImpl::AlgoGroupConvGeneral::exec(
const ExecArgs &args) const {
auto sub_args = args;
TensorND tsrc{*args.src_tensor}, tdst{*args.dst_tensor},
tflt{*args.filter_tensor};
modify_size_args(sub_args, tsrc.layout, tdst.layout);
sub_args.src_tensor = &tsrc;
sub_args.dst_tensor = &tdst;
sub_args.filter_tensor = &tflt;
const ExecArgs& args) const {
auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
{
auto config = prepare_sub_opr(args);
TensorND tsrc{args.src_tensor->raw_ptr, config.first[0]};
TensorND tfilter{args.filter_tensor->raw_ptr, config.first[1]};
TensorND tdst{args.dst_tensor->raw_ptr, config.first[2]};
size_t c_pos;
if (args.filter_meta.format == Param::Format::NCDHW) {
c_pos = 1;
} else {
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC,
"invalid conv format");
c_pos = 4;
}
size_t c_pos;
if (args.filter_meta.format == Param::Format::NCDHW) {
c_pos = 1;
} else {
megdnn_assert(args.filter_meta.format == Param::Format::NDHWC,
"invalid conv format");
c_pos = 4;
}
auto grp = args.filter_meta.group;
auto&& fm = args.filter_meta;
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg *
tsrc.layout.dtype.size(),
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg *
tdst.layout.dtype.size(),
strd_flt = fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
fm.spatial[2] * tfilter.layout.dtype.size();
auto grp = args.filter_meta.group;
auto &&fm = args.filter_meta;
auto strd_src = tsrc.layout.stride[c_pos] * fm.icpg * tsrc.layout.dtype.size(),
strd_dst = tdst.layout.stride[c_pos] * fm.ocpg * tdst.layout.dtype.size(),
strd_flt = fm.icpg * fm.ocpg *
fm.spatial[0] * fm.spatial[1] * fm.spatial[2] *
tflt.layout.dtype.size();
for (uint32_t g = 0; g < grp; ++ g) {
m_impl->exec(sub_args);
incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdst.raw_ptr, strd_dst);
incr_voidp(tflt.raw_ptr, strd_flt);
for (uint32_t g = 0; g < grp; ++g) {
config.second->exec(tsrc, tfilter, tdst, bundle.get_workspace(0));
incr_voidp(tsrc.raw_ptr, strd_src);
incr_voidp(tdst.raw_ptr, strd_dst);
incr_voidp(tfilter.raw_ptr, strd_flt);
}
}
}
......
......@@ -26,6 +26,7 @@ namespace convolution3d {
struct ForwardSizeArgs {
HandleImpl *handle;
const TensorLayout *src_layout;
const TensorLayout *filter_layout;
CanonizedFilterMeta filter_meta;
const TensorLayout *dst_layout;
param::Convolution3D::DataType data_type;
......
......@@ -35,16 +35,6 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
const TensorLayout& dst, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
auto fm = check_layout_fwd(src, filter, dst);
return get_algorithm_heuristic(src, fm, dst, workspace_limit_in_bytes,
positive_attr, negative_attr);
}
Convolution3DForwardImpl::Algorithm*
Convolution3DForwardImpl::get_algorithm_heuristic(
const TensorLayout& src, const CanonizedFilterMeta& filter,
const TensorLayout& dst, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
AlgoBase::SizeArgs args(this, src, filter, dst);
#if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5)
......@@ -98,18 +88,14 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
if (auto algo = get_cudnn_algo())
return algo;
}
if (args.filter_meta.group > 1) {
auto orig_args = args;
TensorLayout a, b;
AlgoGroupConvGeneral::modify_size_args(args, a, b);
if (prefer_1x1x1()) {
return sm_algo_pack.algo2gconv.at(&sm_algo_pack.a1x1x1);
}
if (is_cudnn_supported(args)) {
if (auto algo = get_cudnn_algo())
return sm_algo_pack.algo2gconv.at(algo);
if (auto algo =
megdnn::get_algo_match_attribute<Convolution3DForwardImpl>(
&sm_algo_pack.group, positive_attr,
negative_attr)) {
return algo;
}
args = orig_args;
}
return megdnn::get_algo_match_attribute<Convolution3DForwardImpl>(
......@@ -129,7 +115,7 @@ size_t Convolution3DForwardImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& filter,
const TensorLayout& dst) {
AlgoBase::SizeArgs args(this, src, filter, dst);
return get_algorithm(this, src, args.filter_meta, dst)
return get_algorithm(this, src, filter, dst)
->get_workspace_in_bytes(args);
}
......@@ -138,7 +124,7 @@ void Convolution3DForwardImpl::exec(_megdnn_tensor_in src,
_megdnn_tensor_out dst,
_megdnn_workspace workspace) {
AlgoBase::ExecArgs args(this, src, filter, dst, workspace);
auto algo = get_algorithm(this, src.layout, args.filter_meta, dst.layout);
auto algo = get_algorithm(this, src.layout, filter.layout, dst.layout);
algo->check_workspace(args, workspace).exec(args);
}
......@@ -151,7 +137,7 @@ void Convolution3DBackwardDataImpl::exec(_megdnn_tensor_in filter,
_megdnn_tensor_out grad,
_megdnn_workspace workspace) {
AlgoBase::ExecArgs args(this, filter, diff, grad, workspace);
auto algo = get_algorithm(this, args.filter_meta, diff.layout, grad.layout);
auto algo = get_algorithm(this, filter.layout, diff.layout, grad.layout);
algo->check_workspace(args, workspace).exec(args);
}
......@@ -169,17 +155,6 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
auto fm = check_layout_fwd(grad, filter, diff);
return get_algorithm_heuristic(fm, diff, grad, workspace_limit_in_bytes,
positive_attr, negative_attr);
}
Convolution3DBackwardDataImpl::Algorithm*
Convolution3DBackwardDataImpl::get_algorithm_heuristic(
const CanonizedFilterMeta& filter, const TensorLayout& diff,
const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
AlgoBase::SizeArgs args(this, filter, diff, grad);
if (args.filter_meta.group > 1 &&
......@@ -215,14 +190,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
}
if (args.filter_meta.group > 1) {
auto orig_args = args;
TensorLayout a, b;
AlgoGroupConvGeneral::modify_size_args(args, a, b);
if (is_cudnn_supported(args.as_fwd_args())) {
if (auto algo = get_cudnn_algo())
return sm_algo_pack.algo2gconv.at(algo);
if (auto algo = megdnn::get_algo_match_attribute<
Convolution3DBackwardDataImpl>(
&sm_algo_pack.group, positive_attr, negative_attr)) {
return algo;
}
args = orig_args;
}
return megdnn::get_algo_match_attribute<Convolution3DBackwardDataImpl>(
......@@ -234,7 +206,7 @@ size_t Convolution3DBackwardDataImpl::get_workspace_in_bytes(
const TensorLayout& filter, const TensorLayout& diff,
const TensorLayout& grad) {
AlgoBase::SizeArgs args(this, filter, diff, grad);
return get_algorithm(this, args.filter_meta, diff, grad)
return get_algorithm(this, filter, diff, grad)
->get_workspace_in_bytes(args);
}
......@@ -248,7 +220,7 @@ void Convolution3DBackwardFilterImpl::exec(_megdnn_tensor_in src,
_megdnn_workspace workspace) {
AlgoBase::ExecArgs args(this, src, diff, grad, workspace);
auto algo =
get_algorithm(this, src.layout, diff.layout, args.grad_filter_meta);
get_algorithm(this, src.layout, diff.layout, grad.layout);
algo->check_workspace(args, workspace).exec(args);
}
......@@ -266,17 +238,6 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
auto fm = check_layout_fwd(src, grad, diff);
return get_algorithm_heuristic(src, diff, fm, workspace_limit_in_bytes,
positive_attr, negative_attr);
}
Convolution3DBackwardFilterImpl::Algorithm*
Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
const TensorLayout& src, const TensorLayout& diff,
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
AlgoBase::SizeArgs args(this, src, diff, grad);
if (args.grad_filter_meta.group > 1 &&
......@@ -310,15 +271,13 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
if (auto algo = get_cudnn_algo())
return algo;
}
if (args.grad_filter_meta.group > 1) {
auto orig_args = args;
TensorLayout a, b;
AlgoGroupConvGeneral::modify_size_args(args, a, b);
if (is_cudnn_supported(args.as_fwd_args())) {
if (auto algo = get_cudnn_algo())
return sm_algo_pack.algo2gconv.at(algo);
if (auto algo = megdnn::get_algo_match_attribute<
Convolution3DBackwardFilterImpl>(
&sm_algo_pack.group, positive_attr, negative_attr)) {
return algo;
}
args = orig_args;
}
return megdnn::get_algo_match_attribute<Convolution3DBackwardFilterImpl>(
......@@ -330,7 +289,7 @@ size_t Convolution3DBackwardFilterImpl::get_workspace_in_bytes(
const TensorLayout& src, const TensorLayout& diff,
const TensorLayout& grad) {
AlgoBase::SizeArgs args(this, src, diff, grad);
return get_algorithm(this, src, diff, args.grad_filter_meta)
return get_algorithm(this, src, diff, grad)
->get_workspace_in_bytes(args);
}
......
......@@ -21,17 +21,6 @@ public:
using Convolution3DForward::Convolution3DForward;
void exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
_megdnn_tensor_out dst, _megdnn_workspace workspace) override;
AlgorithmInfo get_algorithm_info_heuristic(const TensorLayout& src,
const CanonizedFilterMeta& filter,
const TensorLayout& dst,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
return get_algorithm_heuristic(src, filter, dst,
workspace_limit_in_bytes, positive_attr,
negative_attr)
->info();
}
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& filter,
const TensorLayout& dst) override;
......@@ -57,13 +46,6 @@ protected:
const AlgoAttribute& negative_attr) override;
private:
Algorithm* get_algorithm_heuristic(const TensorLayout& src,
const CanonizedFilterMeta& filter,
const TensorLayout& dst,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr);
static AlgoPack sm_algo_pack;
};
......@@ -72,16 +54,6 @@ public:
using Convolution3DBackwardData::Convolution3DBackwardData;
void exec(_megdnn_tensor_in filter, _megdnn_tensor_in diff,
_megdnn_tensor_out grad, _megdnn_workspace workspace) override;
AlgorithmInfo get_algorithm_info_heuristic(
const CanonizedFilterMeta& filter, const TensorLayout& diff,
const TensorLayout& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
return get_algorithm_heuristic(filter, diff, grad,
workspace_limit_in_bytes, positive_attr,
negative_attr)
->info();
}
size_t get_workspace_in_bytes(const TensorLayout& filter,
const TensorLayout& diff,
const TensorLayout& grad) override;
......@@ -109,13 +81,6 @@ protected:
const AlgoAttribute& negative_attr) override;
private:
Algorithm* get_algorithm_heuristic(const CanonizedFilterMeta& filter,
const TensorLayout& diff,
const TensorLayout& grad,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr);
static AlgoPack sm_algo_pack;
};
......@@ -127,17 +92,6 @@ public:
size_t get_workspace_in_bytes(const TensorLayout& src,
const TensorLayout& diff,
const TensorLayout& grad) override;
AlgorithmInfo get_algorithm_info_heuristic(
const TensorLayout& src, const TensorLayout& diff,
const CanonizedFilterMeta& grad, size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr) {
return get_algorithm_heuristic(src, diff, grad,
workspace_limit_in_bytes, positive_attr,
negative_attr)
->info();
}
const char* get_algorithm_set_name() const override;
class AlgoBase;
......@@ -162,13 +116,6 @@ protected:
const AlgoAttribute& negative_attr) override;
private:
Algorithm* get_algorithm_heuristic(const TensorLayout& src,
const TensorLayout& diff,
const CanonizedFilterMeta& grad,
size_t workspace_limit_in_bytes,
const AlgoAttribute& positive_attr,
const AlgoAttribute& negative_attr);
static AlgoPack sm_algo_pack;
};
} // namespace cuda
......
......@@ -66,12 +66,10 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_FORWARD_1x1x1) {
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW,
size_t FD, size_t FH, size_t FW, size_t OC, size_t group) {
Checker<Convolution3D> checker(handle_cuda());
#if CUDNN_MAJOR <= 6
bool require_algo = true;
checker.set_before_exec_callback(
AlgoChecker<Convolution3DForward>{
"group_conv3d:1x1x1", &require_algo});
#endif
checker.set_before_exec_callback(AlgoChecker<Convolution3DForward>(
ExecutionPolicyAlgoName{"CUDA:GROUP_CONV3D_FORWARD",
{{"1x1x1",
{}}}}));
Convolution3D::Param param;
param.sparse = Convolution3D::Param::Sparse::GROUP;
auto ICg = IC / group;
......@@ -125,6 +123,45 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA) {
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4);
}
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_DATA_CUDNN) {
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW,
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD,
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW,
size_t SD, size_t SH, size_t SW, size_t group) {
Checker<Convolution3DBackwardData> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<Convolution3DBackwardData>(ExecutionPolicyAlgoName{
"CUDA:GROUP_CONV3D_BACKWARD_DATA", {{"CUDNN", {}}}}));
Convolution3DBackwardData::Param param;
param.sparse = Convolution3D::Param::Sparse::GROUP;
param.pad_d = PD;
param.pad_h = PH;
param.pad_w = PW;
param.stride_d = SD;
param.stride_h = SH;
param.stride_w = SW;
auto ICg = IC / group;
auto OCg = OC / group;
checker.set_param(param).exec({{group, OCg, ICg, FD, FH, FW},
{N, OC, OD, OH, OW},
{N, IC, ID, IH, IW}});
};
// bug case in prev ver
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 3, 0, 0, 1, 1, 1, 1, 2);
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 0, 0, 1, 1, 1, 2, 2);
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2, 1, 0, 1, 0, 1, 2, 1, 2);
run(1, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 0, 0, 2, 1, 1, 2);
// normal case
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2);
// padded case
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4);
// strided case
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8);
// bigger case
run(2, 32, 64, 64, 64, 3, 3, 3, 32, 62, 62, 62, 0, 0, 0, 1, 1, 1, 4);
}
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) {
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW,
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD,
......@@ -153,6 +190,39 @@ TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER) {
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8);
}
TEST_F(CUDA, GROUP_CONVOLUTION3D_BACKWARD_FILTER_CUDNN) {
auto run = [&](size_t N, size_t IC, size_t ID, size_t IH, size_t IW,
size_t FD, size_t FH, size_t FW, size_t OC, size_t OD,
size_t OH, size_t OW, size_t PD, size_t PH, size_t PW,
size_t SD, size_t SH, size_t SW, size_t group) {
Checker<Convolution3DBackwardFilter> checker(handle_cuda());
checker.set_before_exec_callback(
AlgoChecker<Convolution3DBackwardFilter>(
ExecutionPolicyAlgoName{
"CUDA:GROUP_CONV3D_BACKWARD_FILTER",
{{"CUDNN", {}}}}));
Convolution3DBackwardFilter::Param param;
param.sparse = Convolution3D::Param::Sparse::GROUP;
param.pad_d = PD;
param.pad_h = PH;
param.pad_w = PW;
param.stride_d = SD;
param.stride_h = SH;
param.stride_w = SW;
auto ICg = IC / group;
auto OCg = OC / group;
checker.set_param(param).exec({{N, IC, ID, IH, IW},
{N, OC, OD, OH, OW},
{group, OCg, ICg, FD, FH, FW}});
};
// normal case
run(2, 64, 7, 7, 7, 3, 3, 3, 32, 5, 5, 5, 0, 0, 0, 1, 1, 1, 2);
// padded case
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 7, 7, 7, 1, 1, 1, 1, 1, 1, 4);
// strided case
run(2, 32, 7, 7, 7, 3, 3, 3, 64, 3, 3, 3, 0, 0, 0, 2, 2, 2, 8);
}
} // namespace test
} // namespace megdnn
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册