helper.cpp 9.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#include "src/cuda/conv_bias/helper.h"

#include "src/cuda/utils.h"

namespace megdnn {
namespace cuda {

ConvBiasDesc::ConvBiasDesc() {
    cudnn_check(cudnnCreateActivationDescriptor(&act_desc));
    cudnn_check(cudnnCreateConvolutionDescriptor(&conv_desc));
#if CUDNN_VERSION >= 7000
    cudnn_check(cudnnSetConvolutionMathType(conv_desc, CUDNN_TENSOR_OP_MATH));
#endif
}

ConvBiasDesc::~ConvBiasDesc() {
    cudnn_check(cudnnDestroyConvolutionDescriptor(conv_desc));
    cudnn_check(cudnnDestroyActivationDescriptor(act_desc));
}

M
Megvii Engine Team 已提交
21 22
void ConvBiasDesc::set_conv_bias(
        DType data_type, const param::ConvBias& param, size_t nr_group) {
23
#if CUDNN_VERSION < 7100
M
Megvii Engine Team 已提交
24
    megdnn_throw("ConvBias(CUDNN_ACTIVATION_IDENTITY) require cudnn 7.1 or higher");
25 26 27 28 29 30 31 32 33 34 35
#else
    cudnnConvolutionMode_t mode;
    using Param = param::ConvBias;
    switch (param.mode) {
        case Param::Mode::CROSS_CORRELATION:
            mode = CUDNN_CROSS_CORRELATION;
            break;
        case Param::Mode::CONVOLUTION:
            mode = CUDNN_CONVOLUTION;
            break;
        default:
M
Megvii Engine Team 已提交
36
            megdnn_throw("conv mode must be conv or xcorr.");
37 38 39 40 41 42 43 44 45 46 47 48
    }
    cudnn_check(cudnnSetConvolutionGroupCount(conv_desc, nr_group));
    cudnnDataType_t compute_type;
    switch (data_type.category()) {
        case DTypeCategory::FLOAT:
            compute_type = CUDNN_DATA_FLOAT;
            break;
        case DTypeCategory::INT:
        case DTypeCategory::QUANTIZED:
            compute_type = CUDNN_DATA_INT32;
            break;
        default:
M
Megvii Engine Team 已提交
49
            megdnn_throw("unspport data type for conv bias");
50 51 52 53 54 55 56 57 58 59 60 61 62 63
    }
    if (data_type.enumv() == DTypeEnum::Float16) {
        auto comp_mode = param.compute_mode;
        compute_type = get_compute_type_fp16(comp_mode);
    }
    cudnn_check(cudnnSetConvolution2dDescriptor(
            conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
            param.dilate_h, param.dilate_w, mode, compute_type));

    switch (param.nonlineMode) {
        case Param::NonlineMode::IDENTITY:
        case Param::NonlineMode::SIGMOID:
        case Param::NonlineMode::H_SWISH:
            cudnn_check(cudnnSetActivationDescriptor(
M
Megvii Engine Team 已提交
64
                    act_desc, CUDNN_ACTIVATION_IDENTITY, CUDNN_NOT_PROPAGATE_NAN, 0));
65 66 67
            break;
        case Param::NonlineMode::RELU:
            cudnn_check(cudnnSetActivationDescriptor(
M
Megvii Engine Team 已提交
68
                    act_desc, CUDNN_ACTIVATION_RELU, CUDNN_NOT_PROPAGATE_NAN, 0));
69 70
            break;
        default:
M
Megvii Engine Team 已提交
71
            megdnn_throw("unsupported non linear mode");
72 73 74 75
    }
#endif
}

M
Megvii Engine Team 已提交
76 77
void ConvBiasDesc::set_conv(
        DType data_type, const param::ConvBias& param, const size_t nr_group) {
78 79 80 81 82 83 84 85 86 87
    using Param = param::ConvBias;
    cudnnConvolutionMode_t mode;
    switch (param.mode) {
        case Param::Mode::CROSS_CORRELATION:
            mode = CUDNN_CROSS_CORRELATION;
            break;
        case Param::Mode::CONVOLUTION:
            mode = CUDNN_CONVOLUTION;
            break;
        default:
M
Megvii Engine Team 已提交
88
            megdnn_throw("conv mode must be conv or xcorr.");
89 90 91 92 93 94 95 96 97 98
    }
    cudnnDataType_t compute_type;
    MEGDNN_MARK_USED_VAR(compute_type);
    if (data_type.enumv() == DTypeEnum::Float32) {
        // FLOAT_CONFIG
        compute_type = CUDNN_DATA_FLOAT;
    } else if (data_type.enumv() == DTypeEnum::Float16) {
        auto comp_mode = param.compute_mode;
        compute_type = get_compute_type_fp16(comp_mode);
#if CUDNN_MAJOR >= 7
M
Megvii Engine Team 已提交
99 100 101
    } else if (
            data_type.category() == DTypeCategory::INT ||
            data_type.category() == DTypeCategory::QUANTIZED) {
102 103 104
        compute_type = CUDNN_DATA_INT32;
#endif
    } else {
M
Megvii Engine Team 已提交
105
        megdnn_throw("unspport data type for conv bias");
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126
    }
#if CUDNN_MAJOR >= 7
    cudnn_check(cudnnSetConvolutionGroupCount(conv_desc, nr_group));
#else
    megdnn_assert(nr_group == 1);
#endif

#if CUDNN_MAJOR >= 6
    cudnn_check(cudnnSetConvolution2dDescriptor(
            conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
            param.dilate_h, param.dilate_w, mode, compute_type));
#else
    cudnn_check(cudnnSetConvolution2dDescriptor(
            conv_desc, param.pad_h, param.pad_w, param.stride_h, param.stride_w,
            param.dilate_h, param.dilate_w, mode));
#endif
}

namespace conv_bias {

bool is_cudnn_supported(const BiasForwardSizeArgs& args) {
127 128 129
    if (args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS1)
        return false;

130 131
    if ((args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS4 ||
         args.src_layout->dtype.enumv() == DTypeEnum::Quantized4Asymm) &&
132 133 134
        args.filter_layout->dtype.enumv() == DTypeEnum::QuantizedS4)
        return false;

135 136 137 138 139
    if (args.src_layout->dtype == args.filter_layout->dtype &&
        args.src_layout->dtype == dtype::BFloat16()) {
        return false;
    }

140 141 142 143 144
    // CUDNN_STATUS_EXECUTION_FAILED on Tegra K1, so disable CUDNN
    // on Tegra K1.
    if (args.handle->is_tegra_k1())
        return false;

145 146
    if (args.filter_meta.format == param::Convolution::Format::NCHW4 ||
        args.filter_meta.format == param::Convolution::Format::NCHW32) {
147 148 149 150
        if (args.dst_layout->dtype.enumv() != DTypeEnum::Int8 &&
            args.dst_layout->dtype.enumv() != DTypeEnum::QuantizedS8) {
            return false;
        }
M
Megvii Engine Team 已提交
151 152 153
    } else if (
            args.filter_meta.format != param::Convolution::Format::NCHW &&
            args.filter_meta.format != param::Convolution::Format::NHWC) {
154 155 156 157 158 159 160 161 162 163 164 165 166 167
        return false;
    }
    auto& fm = args.filter_meta;
    bool supported = true;
    supported &= (fm.spatial_ndim == 2);
#if CUDNN_VERSION < 7000
    supported &= (fm.group == 1);
#endif
#if CUDNN_VERSION < 7500
    supported &= (fm.dilation[0] == 1 && fm.dilation[1] == 1);
#endif
    return supported;
}

M
Megvii Engine Team 已提交
168
SmallVector<size_t> matmul_get_workspace_bundle(const BiasForwardSizeArgs& args) {
169 170 171 172 173 174
    auto dtype = args.src_layout->dtype;
    auto&& fm = args.filter_meta;
    megdnn_assert(fm.group == 1);
    auto N = args.src_layout->shape[0];
    auto OC = fm.ocpg, IC = fm.icpg, FH = fm.spatial[0], FW = fm.spatial[1];
    auto OH = args.dst_layout->shape[2], OW = args.dst_layout->shape[3];
M
Megvii Engine Team 已提交
175 176 177
    SmallVector<size_t> sizes{
            dtype.size() * args.dst_layout->total_nr_elems(),
            dtype.size() * IC * FH * FW * OH * OW * N};
178 179 180
    if (args.filter_meta.should_flip) {
        sizes.push_back(dtype.size() * OC * IC * FH * FW);
    }
181
    return sizes;
182 183
}

M
Megvii Engine Team 已提交
184
void flip_filter(
185
        const BiasForwardSizeArgs& args, const Workspace& workspace, RefPtr& ref_ptr) {
186 187 188 189 190 191
    auto&& fm = args.filter_meta;
    megdnn_assert(fm.group == 1 && fm.spatial_ndim == 2);
    auto OC = fm.ocpg, IC = fm.icpg, FH = fm.spatial[0], FW = fm.spatial[1];
    auto dtype = fm.dtype;
    megdnn_assert(workspace.size >= dtype.size() * OC * IC * FH * FW);

192
    TensorND src{{{OC, IC, FH, FW}, dtype}, ref_ptr},
193 194 195 196
            dst{workspace.raw_ptr + (FH * FW - 1) * dtype.size(), src.layout};
    dst.layout.stride[2] = -dst.layout.stride[2];
    dst.layout.stride[3] = -dst.layout.stride[3];
    args.handle->relayout_opr()->exec(src, dst);
197
    ref_ptr.reset(workspace.raw_ptr);
198 199
}

200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
std::pair<float, float> cudnn_get_conv_bias_act_scale_param(
        const TensorLayout& x, const TensorLayout& y, const TensorLayout& w,
        const TensorLayout& b, const TensorLayout& z) {
    float alpha = 1.f, beta = 0.f;
    if (z.ndim > 0)
        beta = 1.f;

    auto get_scale = [](const DType& dtype) -> float {
        megdnn_assert(dtype.category() == DTypeCategory::QUANTIZED);
        switch (dtype.enumv()) {
#define cb(_dt)                  \
    case DTypeTrait<_dt>::enumv: \
        return dtype.param<_dt>().scale;
            MEGDNN_FOREACH_QUANTIZED_DTYPE(cb)
#undef cb
            default:
                megdnn_assert_internal(0);
        }
    };
219

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241
    auto x_dtype = x.dtype, y_dtype = y.dtype, w_dtype = w.dtype;
    megdnn_assert(
            (x_dtype.category() == y_dtype.category()) ||
            (x_dtype.enumv() == DTypeEnum::QuantizedS8 &&
             y_dtype.enumv() == DTypeEnum::Float32));
    megdnn_assert(x_dtype.category() == w_dtype.category());

    if (x_dtype.category() == DTypeCategory::QUANTIZED) {
        auto expected_bias_scale = get_scale(x_dtype) * get_scale(w_dtype);
        alpha = expected_bias_scale;
        if (y_dtype.category() == DTypeCategory::QUANTIZED)
            alpha /= get_scale(y_dtype);
        if (z.ndim > 0 && z.dtype.category() == DTypeCategory::QUANTIZED) {
            beta = get_scale(z.dtype) / get_scale(y_dtype);
        }
        if (b.dtype.category() == DTypeCategory::QUANTIZED) {
            megdnn_assert(fabs(expected_bias_scale - get_scale(b.dtype)) < 1e-4);
        }
    }
    return {alpha, beta};
}

242 243
#if CUDNN_VERSION >= 7500
void cudnn_reorder_filter_and_bias_nchw32(
244 245 246 247 248 249 250 251 252 253
        const cudnnHandle_t& handle, const void* filter_ptr,
        const CanonizedFilterMeta& fm, const void* bias_ptr, void* reordered_filter_ptr,
        void* reordered_bias_ptr) {
    FilterDesc<param::ConvBias> filter_desc;
    filter_desc.set(fm);
    int reorder_bias = bias_ptr != nullptr;
    cudnn_check(cudnnReorderFilterAndBias(
            handle, filter_desc.desc, CUDNN_DEFAULT_REORDER, filter_ptr,
            reordered_filter_ptr, reorder_bias, bias_ptr, reordered_bias_ptr));
}
254 255
#endif

256
}  // namespace conv_bias
M
Megvii Engine Team 已提交
257 258
}  // namespace cuda
}  // namespace megdnn
259 260

// vim: syntax=cpp.doxygen