cudnn_conv.cpp 6.3 KB
Newer Older
1 2 3 4
/**
 * \file dnn/src/cuda/conv_bias/cudnn_conv.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8 9 10 11
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 */

M
Megvii Engine Team 已提交
12
#include "src/common/conv_bias.h"
13 14 15 16 17 18 19 20
#include "src/cuda/conv_bias/algo.h"
#include "src/cuda/cudnn_wrapper.h"
#include "src/cuda/utils.h"

using namespace megdnn;
using namespace cuda;
using namespace conv_bias;

M
Megvii Engine Team 已提交
21
bool ConvBiasForwardImpl::AlgoCUDNNConv::is_available(const SizeArgs& args) const {
22 23
    if (args.filter_meta.format != Param::Format::NCHW &&
        args.filter_meta.format != Param::Format::NHWC) {
M
Megvii Engine Team 已提交
24
        if (!args.src_layout->is_contiguous() || !args.dst_layout->is_contiguous()) {
25 26 27 28
            return false;
        }
    }

29 30 31 32 33
    if (args.dst_layout->dtype.enumv() == DTypeEnum::QuantizedS4 ||
        args.dst_layout->dtype.enumv() == DTypeEnum::Quantized4Asymm) {
        return false;
    }

34 35 36 37 38 39 40 41 42
    // FIXME: cudnn cannot handle the case when the initial value of dst tensor
    // contains nan and beta is zero, because the result of 0.f * nan is still
    // nan
    if (args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS8 &&
        args.dst_layout->dtype.enumv() == DTypeEnum::Float32 &&
        args.opr->param().format == param::ConvBias::Format::NCHW) {
        return false;
    }

43 44 45
    auto dst_layout = *args.dst_layout;
    if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) {
        dst_layout.dtype = DType();
M
Megvii Engine Team 已提交
46 47
        args.opr->check_or_deduce_dtype_fwd(
                args.src_layout->dtype, args.filter_layout->dtype, dst_layout.dtype);
48 49 50 51 52 53 54 55 56 57
    }
    SizeArgs conv_args = args;
    conv_args.dst_layout = &dst_layout;

    if (!is_cudnn_supported(conv_args))
        return false;
    CUDNNForwardDescs D;
    conv_args.init_conv_desc(D);

    size_t workspace_size;
58
    auto status = cudnnGetConvolutionForwardWorkspaceSize(
M
Megvii Engine Team 已提交
59 60
            conv_args.handle->cudnn_handle(), D.src_desc.desc, D.filter_desc.desc,
            D.conv_desc.conv_desc, D.dst_desc.desc, m_cudnn_enum, &workspace_size);
61 62 63 64 65 66 67 68 69
    return status == CUDNN_STATUS_SUCCESS;
}

WorkspaceBundle ConvBiasForwardImpl::AlgoCUDNNConv::get_workspace_bundle(
        void* ptr, const SizeArgs& args) const {
    auto dst_layout = *args.dst_layout;
    SmallVector<size_t> sizes;
    if (dst_layout.dtype.enumv() != args.bias_layout->dtype.enumv()) {
        dst_layout.dtype = DType();
M
Megvii Engine Team 已提交
70 71
        args.opr->check_or_deduce_dtype_fwd(
                args.src_layout->dtype, args.filter_layout->dtype, dst_layout.dtype);
72 73 74
        sizes.push_back(dst_layout.span().dist_byte());
    }

75 76 77 78 79 80 81 82 83
    if (args.z_layout->ndim > 0 &&
        args.z_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) {
        auto z_layout = *args.z_layout;
        z_layout.dtype = DType();
        args.opr->check_or_deduce_dtype_fwd(
                args.src_layout->dtype, args.filter_layout->dtype, z_layout.dtype);
        sizes.push_back(z_layout.span().dist_byte());
    }

84 85 86 87 88 89 90
    SizeArgs conv_args = args;
    conv_args.dst_layout = &dst_layout;

    CUDNNForwardDescs D;
    conv_args.init_conv_desc(D);

    size_t conv_workspace_size;
91
    auto status = cudnnGetConvolutionForwardWorkspaceSize(
M
Megvii Engine Team 已提交
92 93 94 95 96 97
            conv_args.handle->cudnn_handle(), D.src_desc.desc, D.filter_desc.desc,
            D.conv_desc.conv_desc, D.dst_desc.desc, m_cudnn_enum, &conv_workspace_size);
    megdnn_assert(
            status == CUDNN_STATUS_SUCCESS,
            "conv fwd get workspace failed: %s; info: %s", cudnnGetErrorString(status),
            args.to_string().c_str());
98 99 100 101 102 103 104 105 106 107 108
    sizes.insert(sizes.begin(), conv_workspace_size);
    return {ptr, std::move(sizes)};
}

size_t ConvBiasForwardImpl::AlgoCUDNNConv::get_workspace_in_bytes(
        const SizeArgs& args) const {
    return get_workspace_bundle(nullptr, args).total_size_in_bytes();
}

void ConvBiasForwardImpl::AlgoCUDNNConv::exec(const ExecArgs& args) const {
    auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
109
    TensorND conv_dst_tensor = *args.dst_tensor;
110
    if (args.dst_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) {
111
        conv_dst_tensor = TensorND{bundle.get(1), args.dst_tensor->layout};
112
        conv_dst_tensor.layout.dtype = DType();
M
Megvii Engine Team 已提交
113 114 115
        args.opr->check_or_deduce_dtype_fwd(
                args.src_layout->dtype, args.filter_layout->dtype,
                conv_dst_tensor.layout.dtype);
116 117 118 119 120 121 122 123 124 125 126 127 128
    }

    ExecArgs conv_args = args;
    conv_args.dst_tensor = &conv_dst_tensor;
    conv_args.dst_layout = &conv_dst_tensor.layout;

    {
        CUDNNForwardDescs D;
        conv_args.init_conv_desc(D);
        auto conv_workspace = bundle.get_workspace(0);
        float alpha = 1.0f, beta = 0.0f;
        auto status = cudnnConvolutionForward(
                conv_args.handle->cudnn_handle(), &alpha, D.src_desc.desc,
129 130
                conv_args.src_tensor->raw_ptr(), D.filter_desc.desc,
                conv_args.filter_tensor->raw_ptr(), D.conv_desc.conv_desc, m_cudnn_enum,
M
Megvii Engine Team 已提交
131
                conv_workspace.raw_ptr, conv_workspace.size, &beta, D.dst_desc.desc,
132
                conv_args.dst_tensor->raw_ptr());
M
Megvii Engine Team 已提交
133 134 135
        megdnn_assert(
                status == CUDNN_STATUS_SUCCESS, "conv fwd failed: %s; info: %s",
                cudnnGetErrorString(status), conv_args.to_string().c_str());
136 137
    }

138 139 140
    if (args.z_layout->ndim > 0) {
        auto z_tensor = *args.z_tensor;
        if (args.z_layout->dtype.enumv() != args.bias_layout->dtype.enumv()) {
141
            z_tensor = TensorND{bundle.get(2), args.z_tensor->layout};
142 143 144 145 146 147 148 149 150 151 152 153
            z_tensor.layout.dtype = DType();
            args.opr->check_or_deduce_dtype_fwd(
                    args.src_layout->dtype, args.filter_layout->dtype,
                    z_tensor.layout.dtype);
            auto typecvt = args.handle->create_operator<TypeCvt>();
            typecvt->exec(*args.z_tensor, z_tensor);
        }
        auto add = args.handle->create_operator<ElemwiseForward>();
        add->param().mode = Elemwise::Param::Mode::ADD;
        add->exec({conv_dst_tensor, z_tensor}, conv_dst_tensor);
    }

M
Megvii Engine Team 已提交
154 155 156
    handle_bias_and_nonlinear(
            args.handle, args.nonlinear_mode, &conv_dst_tensor, args.dst_tensor,
            args.bias_tensor);
157 158 159
}

// vim: syntax=cpp.doxygen