group_conv.cpp 3.0 KB
Newer Older
1 2 3 4
/**
 * \file dnn/src/cuda/convolution/backward_data/group_conv.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
9 10
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
11 12 13 14 15 16 17 18 19
 */

#include "./algo.h"

using namespace megdnn;
using namespace cuda;
using namespace convolution;

void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::modify_size_args(
20 21
        ConvolutionBackwardDataImpl::AlgoBase::SizeArgs& args,
        TensorLayout& diff_pg, TensorLayout& grad_pg) {
22 23 24 25 26 27 28 29 30 31 32
    diff_pg = *args.diff_layout;
    grad_pg = *args.grad_layout;
    auto nr_grp = args.filter_meta.group;
    args.filter_meta.group = 1;
    diff_pg.shape[1] /= nr_grp;
    grad_pg.shape[1] /= nr_grp;
    args.diff_layout = &diff_pg;
    args.grad_layout = &grad_pg;
}

ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::AlgoGroupConvGeneral(
33 34
        AlgoBase* impl)
        : m_impl{impl} {
35 36 37 38 39
    m_name = "group_conv:";
    m_name += impl->name();
}

bool ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::is_available(
40 41 42 43 44
        const SizeArgs& args) const {
    if ((args.diff_layout->dtype == args.filter_layout->dtype &&
         args.diff_layout->dtype == dtype::BFloat16()) ||
        (args.diff_layout->dtype == args.filter_layout->dtype &&
         args.diff_layout->dtype == dtype::QuantizedS8())) {
45 46
        return false;
    }
47 48
    if (args.filter_meta.group <= 1)
        return false;
49 50 51 52 53 54
    auto sub_args = args;
    TensorLayout diff_pg, grad_pg;
    modify_size_args(sub_args, diff_pg, grad_pg);
    return m_impl->is_available(sub_args);
}

55 56 57
size_t
ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::get_workspace_in_bytes(
        const SizeArgs& args) const {
58 59 60 61 62 63 64
    auto sub_args = args;
    TensorLayout diff_pg, grad_pg;
    modify_size_args(sub_args, diff_pg, grad_pg);
    return m_impl->get_workspace_in_bytes(sub_args);
}

void ConvolutionBackwardDataImpl::AlgoGroupConvGeneral::exec(
65
        const ExecArgs& args) const {
66 67
    auto sub_args = args;
    TensorND tflt{*args.filter_tensor}, tdiff{*args.diff_tensor},
68
            tgrad{*args.grad_tensor};
69 70 71 72 73 74
    modify_size_args(sub_args, tdiff.layout, tgrad.layout);
    sub_args.filter_tensor = &tflt;
    sub_args.diff_tensor = &tdiff;
    sub_args.grad_tensor = &tgrad;
    auto grp = args.filter_meta.group;

75 76 77 78 79 80 81 82
    auto&& fm = args.filter_meta;
    auto strd_flt = (fm.icpg * fm.ocpg * fm.spatial[0] * fm.spatial[1] *
                     tflt.layout.dtype.size()),
         strd_diff =
                 (tdiff.layout.stride[1] * fm.ocpg * tdiff.layout.dtype.size()),
         strd_grad =
                 (tgrad.layout.stride[1] * fm.icpg * tgrad.layout.dtype.size());
    for (uint32_t g = 0; g < grp; ++g) {
83 84 85 86 87 88 89 90
        m_impl->exec(sub_args);
        incr_voidp(tflt.raw_ptr, strd_flt);
        incr_voidp(tdiff.raw_ptr, strd_diff);
        incr_voidp(tgrad.raw_ptr, strd_grad);
    }
}

// vim: syntax=cpp.doxygen