/** * \file dnn/src/cuda/conv_bias/simple_int1.cpp * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. */ #include "src/common/algo_base.h" #include "src/cuda/conv_bias/algo.h" #include "src/cuda/handle.h" #include "src/cuda/utils.cuh" #include "src/cuda/utils.h" using namespace megdnn; using namespace cuda; using namespace conv_bias; namespace { std::pair sub_opr_config( const TensorLayoutArray& layouts, const ConvBiasForwardImpl* opr) { megdnn_assert(layouts.size() >= 3); std::pair ret; ret.first = layouts; auto change_dtype = [](TensorLayout& layout) { if (layout.dtype.enumv() == DTypeEnum::QuantizedS1 || layout.dtype.enumv() == DTypeEnum::QuantizedS32) { layout.dtype = dtype::Float32(); } }; change_dtype(ret.first[0]); change_dtype(ret.first[1]); change_dtype(ret.first[2]); change_dtype(ret.first[3]); change_dtype(ret.first[4]); ret.second = opr->param(); ret.second.compute_mode = ConvBiasForwardImpl::Param::ComputeMode::DEFAULT; return ret; } std::pair> prepare_sub_opr( const ConvBiasForwardImpl::AlgoBase::SizeArgs& args) { auto convbias_opr = args.handle->create_operator(); auto&& config = sub_opr_config( {*args.src_layout, *args.filter_layout, *args.bias_layout, *args.z_layout, *args.dst_layout}, args.opr); convbias_opr->param() = config.second; return {config.first, std::move(convbias_opr)}; } } // namespace std::vector ConvBiasForwardImpl::AlgoSimpleInt1::get_subopr_list( const TensorLayoutArray& layouts, const OperatorBase* opr) const { auto&& config = sub_opr_config(layouts, static_cast(opr)); std::string param_str; Algorithm::serialize_write_pod(config.second, param_str); return {{Algorithm::OprType::CONVBIAS_FORWARD, param_str, config.first}}; } bool ConvBiasForwardImpl::AlgoSimpleInt1::is_available(const SizeArgs& args) const { if (args.src_layout->dtype.valid() && args.filter_layout->dtype.valid() && args.bias_layout->dtype.valid() && args.z_layout->dtype.valid() && args.dst_layout->dtype.valid()) { auto config = prepare_sub_opr(args); return args.src_layout->dtype.enumv() == args.filter_layout->dtype.enumv() && args.src_layout->dtype.enumv() == DTypeEnum::QuantizedS1 && get_algorithm( static_cast(config.second.get()), config.first[0], config.first[1], config.first[2], config.first[3], config.first[4]); } else { return false; } } WorkspaceBundle ConvBiasForwardImpl::AlgoSimpleInt1::get_workspace_bundle( void* ptr, const SizeArgs& args) const { auto config = prepare_sub_opr(args); SmallVector sizes; auto get_workspace = [&sizes](const TensorLayout& src, const TensorLayout& dst) { if (src.dtype != dst.dtype) { sizes.push_back(dst.span().dist_byte()); } }; get_workspace(*args.src_layout, config.first[0]); get_workspace(*args.filter_layout, config.first[1]); get_workspace(*args.bias_layout, config.first[2]); get_workspace(*args.z_layout, config.first[3]); get_workspace(*args.dst_layout, config.first[4]); sizes.push_back(config.second->get_workspace_in_bytes( config.first[0], config.first[1], config.first[2], config.first[3], config.first[4], nullptr)); return {ptr, std::move(sizes)}; } size_t ConvBiasForwardImpl::AlgoSimpleInt1::get_workspace_in_bytes( const SizeArgs& args) const { return get_workspace_bundle(nullptr, args).total_size_in_bytes(); } void ConvBiasForwardImpl::AlgoSimpleInt1::exec(const ExecArgs& args) const { TensorND fsrc_tensor = *args.src_tensor; TensorND ffilter_tensor = *args.filter_tensor; TensorND fbias_tensor = *args.bias_tensor; TensorND fz_tensor = *args.z_tensor; TensorND fdst_tensor = *args.dst_tensor; auto config = prepare_sub_opr(args); auto bundle = get_workspace_bundle(args.workspace.raw_ptr, args); CompTypeCvter cvter(args.handle, &bundle); { cvter.src_to_comp_type(*args.src_tensor, fsrc_tensor) .src_to_comp_type(*args.filter_tensor, ffilter_tensor); } WorkspaceBundle dst_bundle = { bundle.get(2), {bundle.get_size(2), bundle.get_size(3), bundle.get_size(4), bundle.get_size(5)}}; CompTypeCvter dst_cvter( args.handle, &dst_bundle); { dst_cvter.src_to_comp_type(*args.bias_tensor, fbias_tensor) .src_to_comp_type(*args.z_tensor, fz_tensor) .src_to_comp_type(*args.dst_tensor, fdst_tensor); } config.second->exec( fsrc_tensor, ffilter_tensor, fbias_tensor, fz_tensor, fdst_tensor, nullptr, dst_cvter.workspace()); { dst_cvter.comp_to_dst_type(fdst_tensor, *args.dst_tensor); } } // vim: syntax=cpp.doxygen