提交 d04cd67f 编写于 作者: M Megvii Engine Team 提交者: huangxinda

refactor(mgb): make conv-backward-filter handle noncontiguous tensors

GitOrigin-RevId: 44c586f912a235f5610f7ef40a815181726aa153
上级 44376f70
......@@ -1189,7 +1189,12 @@ ConvolutionBackwardFilter::check_exec(const TensorLayout& src,
diff.dtype.category() == DTypeCategory::FLOAT &&
grad.dtype.category() == DTypeCategory::FLOAT,
"only float type is supported for conv backward filter");
auto ret = check_layout_fwd(src, grad, diff);
auto src_fwd = src;
auto diff_fwd = diff;
src_fwd.init_contiguous_stride();
diff_fwd.init_contiguous_stride();
auto ret = check_layout_fwd(src_fwd, grad, diff_fwd);
auto required_workspace_in_bytes = get_workspace_in_bytes(src, diff, grad);
megdnn_assert(workspace_in_bytes >= required_workspace_in_bytes);
return ret;
......
......@@ -20,6 +20,10 @@ using namespace convolution;
bool ConvolutionBackwardDataImpl::AlgoChanwise::is_available(
const SizeArgs& args) const {
if (!args.grad_layout->is_contiguous() ||
!args.diff_layout->is_contiguous()) {
return false;
}
if ((args.diff_layout->dtype == args.filter_layout->dtype &&
args.diff_layout->dtype == dtype::BFloat16()) ||
(args.diff_layout->dtype == args.filter_layout->dtype &&
......
......@@ -30,6 +30,10 @@ inline bool is_available_small(const chanwise::Param& param) {
bool ConvolutionBackwardDataImpl::AlgoChanwiseSmall::is_available(
const SizeArgs& args) const {
if (!args.grad_layout->is_contiguous() ||
!args.diff_layout->is_contiguous()) {
return false;
}
if ((args.diff_layout->dtype == args.filter_layout->dtype &&
args.diff_layout->dtype == dtype::BFloat16()) ||
(args.diff_layout->dtype == args.filter_layout->dtype &&
......
......@@ -71,7 +71,7 @@ ConvolutionBackwardFilterImpl::AlgoBase::SizeArgs::SizeArgs(
ConvolutionBackwardFilterImpl *o,
const TensorLayout &src, const TensorLayout &diff,
const TensorLayout &grad):
SizeArgs(o, src, diff, grad, o->check_layout_fwd(src, grad, diff))
SizeArgs(o, src, diff, grad, o->make_canonized_filter_meta(src.ndim, grad))
{
}
......
......@@ -19,6 +19,10 @@ using namespace convolution;
bool ConvolutionBackwardFilterImpl::AlgoChanwise::is_available(
const SizeArgs &args) const {
if (!args.grad_layout->is_contiguous() ||
!args.diff_layout->is_contiguous()) {
return false;
}
if (args.src_layout->dtype == args.src_layout->dtype &&
args.diff_layout->dtype == dtype::BFloat16()) {
return false;
......
......@@ -21,6 +21,13 @@ using namespace convolution;
bool ConvolutionBackwardFilterImpl::AlgoCUDNN::is_available(
const SizeArgs &args) const {
if (args.grad_filter_meta.format != Param::Format::NCHW &&
args.grad_filter_meta.format != Param::Format::NHWC) {
if (!args.grad_layout->is_contiguous() ||
!args.diff_layout->is_contiguous()) {
return false;
}
}
auto& cudnn = args.handle->cudnn();
CUDNNBwdFilterDescs D;
......
......@@ -64,8 +64,9 @@ ConvolutionBackwardFilterImpl::AlgoMatmul::get_subopr_list(
const TensorLayoutArray& layouts, const OperatorBase* opr) const {
const ConvolutionBackwardFilterImpl* conv_backward_filter_opr =
static_cast<const ConvolutionBackwardFilterImpl*>(opr);
CanonizedFilterMeta fm = conv_backward_filter_opr->check_layout_fwd(
layouts[0], layouts[2], layouts[1]);
CanonizedFilterMeta fm =
conv_backward_filter_opr->make_canonized_filter_meta(
layouts[0].ndim, layouts[2]);
auto&& config = sub_opr_config(fm, layouts[0], layouts[1], layouts[2],
conv_backward_filter_opr);
......
......@@ -519,6 +519,38 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) {
.set_param(arg.param)
.exec(TensorLayoutArray{src, dst, filter});
}
//! noncontiguous case
{
NormalRNG default_rng;
param::Convolution param;
param.pad_h = param.pad_w = 1;
checker.set_rng(0, &default_rng)
.set_rng(1, &default_rng)
.set_param(param)
.execl(TensorLayoutArray{
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()},
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()},
{{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}});
}
}
TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_CUDNN) {
if (cuda::is_compute_capability_required(7, 0))
return;
using namespace convolution;
Checker<ConvolutionBackwardFilter> checker(handle_cuda());
checker.set_before_exec_callback(AlgoChecker<ConvolutionBackwardFilter>(
"CUDNN_CONVOLUTION"));
//! noncontiguous case
{
param::Convolution param;
param.pad_h = param.pad_w = 1;
checker.set_param(param).execl(TensorLayoutArray{
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()},
{{2, 16, 7, 7}, {1568, 49, 7, 1}, dtype::Float32()},
{{16, 16, 3, 3}, {144, 9, 3, 1}, dtype::Float32()}
});
}
}
TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册