bfloat16.cpp 5.5 KB
Newer Older
1 2 3 4
/**
 * \file dnn/src/cuda/conv_bias/bfloat16.cpp
 * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
 *
5
 * Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
6 7 8
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
9 10
 * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.
11 12 13 14 15 16 17 18 19 20 21
 */

#include "src/cuda/conv_bias/algo.h"
#include "src/cuda/handle.h"
#include "src/cuda/utils.cuh"
#include "src/cuda/utils.h"

using namespace megdnn;
using namespace cuda;
using namespace conv_bias;

22 23 24 25 26 27
namespace {
std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> sub_opr_config(
        const TensorLayoutArray& layouts, const ConvBiasForwardImpl* opr) {
    megdnn_assert(layouts.size() >= 3);
    std::pair<TensorLayoutArray, ConvBiasForwardImpl::Param> ret;
    ret.first = layouts;
28 29 30 31 32
    auto change_dtype = [](TensorLayout& layout) {
        if (layout.dtype == dtype::BFloat16()) {
            layout.dtype = dtype::Float32();
        }
    };
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
    change_dtype(ret.first[0]);
    change_dtype(ret.first[1]);
    change_dtype(ret.first[2]);
    change_dtype(ret.first[3]);
    change_dtype(ret.first[4]);

    ret.second = opr->param();
    ret.second.compute_mode = ConvBiasForwardImpl::Param::ComputeMode::DEFAULT;
    return ret;
}
}  // namespace

std::vector<Algorithm::SearchItem>
ConvBiasForwardImpl::AlgoBFloat16::get_subopr_list(
        const TensorLayoutArray& layouts, const OperatorBase* opr) const {
    auto&& config = sub_opr_config(
            layouts, static_cast<const ConvBiasForwardImpl*>(opr));

    std::string param_str;
    Algorithm::serialize_write_pod(config.second, param_str);
    return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}};
54 55 56 57 58
}

bool ConvBiasForwardImpl::AlgoBFloat16::is_available(
        const SizeArgs& args) const {
    auto convbias_opr = args.handle->create_operator<ConvBias>();
59 60 61 62 63 64
    auto&& config = sub_opr_config(
            {*args.src_layout, *args.filter_layout, *args.bias_layout,
             *args.z_layout, *args.dst_layout},
            args.opr);
    convbias_opr->param() = config.second;

65 66
    return args.src_layout->dtype == args.filter_layout->dtype &&
           args.src_layout->dtype == dtype::BFloat16() &&
67 68 69
           get_algorithm(static_cast<ConvBiasForwardImpl*>(convbias_opr.get()),
                         config.first[0], config.first[1], config.first[2],
                         config.first[3], config.first[4]);
70 71 72 73 74
}

WorkspaceBundle ConvBiasForwardImpl::AlgoBFloat16::get_workspace_bundle(
        void* ptr, const SizeArgs& args) const {
    auto convbias_opr = args.handle->create_operator<ConvBias>();
75 76 77 78 79 80 81 82 83 84 85
    if (args.opr->execution_policy().algo.valid()) {
        megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
        convbias_opr->execution_policy() =
                args.opr->execution_policy().sub_policy[0];
    }
    auto&& config = sub_opr_config(
            {*args.src_layout, *args.filter_layout, *args.bias_layout,
             *args.z_layout, *args.dst_layout},
            args.opr);
    convbias_opr->param() = config.second;

86 87 88 89 90 91 92
    SmallVector<size_t> sizes;
    auto get_workspace = [&sizes](const TensorLayout& src,
                                  const TensorLayout& dst) {
        if (src.dtype != dst.dtype) {
            sizes.push_back(dst.span().dist_byte());
        }
    };
93 94 95 96 97 98 99 100 101
    get_workspace(*args.src_layout, config.first[0]);
    get_workspace(*args.filter_layout, config.first[1]);
    get_workspace(*args.bias_layout, config.first[2]);
    get_workspace(*args.z_layout, config.first[3]);
    get_workspace(*args.dst_layout, config.first[4]);
    sizes.push_back(convbias_opr->get_workspace_in_bytes(
            config.first[0], config.first[1], config.first[2], config.first[3],
            config.first[4], nullptr));

102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    return {ptr, std::move(sizes)};
}

size_t ConvBiasForwardImpl::AlgoBFloat16::get_workspace_in_bytes(
        const SizeArgs& args) const {
    return get_workspace_bundle(nullptr, args).total_size_in_bytes();
}

void ConvBiasForwardImpl::AlgoBFloat16::exec(const ExecArgs& args) const {
    TensorND fsrc_tensor = *args.src_tensor;
    TensorND ffilter_tensor = *args.filter_tensor;
    TensorND fbias_tensor = *args.bias_tensor;
    TensorND fz_tensor = *args.z_tensor;
    TensorND fdst_tensor = *args.dst_tensor;
    auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args);
    CompTypeCvter<dtype::BFloat16, dtype::Float32> cvter(args.handle, &bundle);
    {
        cvter.src_to_comp_type(*args.src_tensor, fsrc_tensor)
                .src_to_comp_type(*args.filter_tensor, ffilter_tensor)
                .src_to_comp_type(*args.bias_tensor, fbias_tensor)
                .src_to_comp_type(*args.z_tensor, fz_tensor)
                .src_to_comp_type(*args.dst_tensor, fdst_tensor);
    }
    {
        auto convbias_opr = args.handle->create_operator<ConvBias>();
        convbias_opr->param() = args.opr->param();
        convbias_opr->param().compute_mode = Param::ComputeMode::DEFAULT;
129 130 131 132 133 134
        if (args.opr->execution_policy().algo.valid()) {
            megdnn_assert(args.opr->execution_policy().sub_policy.size() == 1);
            convbias_opr->execution_policy() =
                    args.opr->execution_policy().sub_policy[0];
        }

135
        convbias_opr->exec(fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor,
136
                           fdst_tensor, nullptr, cvter.workspace());
137 138 139 140 141
    }
    { cvter.comp_to_dst_type(fdst_tensor, *args.dst_tensor); }
}

// vim: syntax=cpp.doxygen