提交 4abf7bd3 编写于 作者: M Megvii Engine Team 提交者: huangxinda

refactor(dnn/cuda): refactor kernel generator for cutlass convolution kernels

GitOrigin-RevId: 7882f9c68c0e0f35159f12a07ccdf027f4540ff1
上级 b4687ce8
......@@ -37,14 +37,13 @@ all: ${PARAM_DEFS} ${ELEMWISE_IMPL} ${CUDA_CONV_IMPL} $(CUDA_MATMUL_IMPL)
../src/cuda/elemwise_multi_type/kimpl: gen_elemwise_multi_type_kern_impls.py
./$^ --type cuda $@
../src/cuda/conv_bias/int8/kimpl: gen_cuda_conv_bias_kern_impls.py gen_cutlass_conv_bias_kern_impls.py cutlass_generator/generator.py
../src/cuda/conv_bias/int8/kimpl: gen_cuda_conv_bias_kern_impls.py cutlass_generator
./gen_cuda_conv_bias_kern_impls.py --type dp4a $@
./gen_cutlass_conv_bias_kern_impls.py --type dp4a $@
python3 ./cutlass_generator/generator.py --operations all --type simt $@
../src/cuda/conv_bias/int8_imma/kimpl: gen_cuda_conv_bias_kern_impls.py gen_cutlass_conv_bias_kern_impls.py
../src/cuda/conv_bias/int8_imma/kimpl: gen_cuda_conv_bias_kern_impls.py cutlass_generator
./gen_cuda_conv_bias_kern_impls.py --type imma $@
./gen_cutlass_conv_bias_kern_impls.py --type imma $@
python3 ./cutlass_generator/generator.py --operations conv2d --type tensorop8816 $@
../src/cuda/batch_conv_bias/int8/kimpl: gen_cuda_batch_conv_bias_kern_impls.py
./$^ --type dp4a $@
......
......@@ -807,9 +807,9 @@ void megdnn::cuda::cutlass_wrapper::
const int32_t* d_bias, const uint8_t* d_z, uint8_t* d_dst,
int* workspace, const convolution::ConvParam& param,
uint32_t nonlinear_mode, float alpha, float beta, float gamma,
float delta, float theta, float scale, uint8_t src_zero_point,
const GemmCoord& threadblock_shape, const GemmCoord& warp_shape,
cudaStream_t stream) {
float delta, float theta, float /* scale */,
uint8_t src_zero_point, const GemmCoord& threadblock_shape,
const GemmCoord& warp_shape, cudaStream_t stream) {
#define DISPATCH_KERNEL_WITH_TILE_SHAPE(threadblock_m_, threadblock_n_, \
threadblock_k_, warp_m_, warp_n_, \
warp_k_) \
......@@ -878,15 +878,6 @@ void megdnn::cuda::cutlass_wrapper::
0, delta, theta};
DISPATCH_KERNEL;
}
case NonlineMode::H_SWISH: {
using EpilogueOp = cutlass::epilogue::thread::
BiasAddLinearCombinationHSwishClamp<
ElementOutput, 16, ElementAccumulator, ElementBias,
ElementCompute>;
typename EpilogueOp::Params epilogue{alpha, beta, gamma,
scale, delta, theta};
DISPATCH_KERNEL;
}
default:
megdnn_assert(false,
"unsupported nonlinear mode for conv bias operator");
......@@ -960,8 +951,7 @@ void megdnn::cuda::cutlass_wrapper::
ThreadBlockShape, WarpShape, InstructionShape, EpilogueOp, \
cutlass::conv::threadblock:: \
ConvolutionFpropNCxHWxThreadblockSwizzle, \
stages_, 4, aligned_, true, \
cutlass::arch::OpMultiplyAddSaturate>; \
stages_, 4, aligned_, true, cutlass::arch::OpMultiplyAdd>; \
typename Convolution::ConvolutionParameter conv_param( \
param.n, param.hi, param.wi, param.ci, param.co, param.fh, \
param.fw, param.ho, param.wo, param.ph, param.pw, param.sh, \
......
/**
* \file
* dnn/src/cuda/conv_bias/int4/conv_bias_int4_implicit_gemm_cutlass_wrapper.cuinl
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "cutlass/convolution/device/convolution.h"
#include "src/cuda/conv_bias/cutlass_convolution_wrapper.cuh"
using namespace megdnn;
using namespace cuda;
using namespace cutlass_wrapper;
template <typename Convolution>
void megdnn::cuda::cutlass_wrapper::cutlass_convolution_wrapper(
const typename Convolution::ElementSrc* d_src,
const typename Convolution::ElementFilter* d_filter,
const typename Convolution::ElementBias* d_bias,
const typename Convolution::ElementDst* d_z,
typename Convolution::ElementDst* d_dst, int* workspace,
typename Convolution::ConvolutionParameter const& conv_param,
typename Convolution::EpilogueOutputOp::Params const& epilogue,
cudaStream_t stream, typename Convolution::ExtraParam extra_param) {
typename Convolution::TensorRefSrc tensor_src{
const_cast<typename Convolution::ElementSrc*>(d_src),
Convolution::LayoutSrc::packed(
{conv_param.N, conv_param.H, conv_param.W, conv_param.C})};
typename Convolution::TensorRefFilter tensor_filter{
const_cast<typename Convolution::ElementFilter*>(d_filter),
Convolution::LayoutFilter::packed(
{conv_param.K, conv_param.R, conv_param.S, conv_param.C})};
typename Convolution::TensorRefBias tensor_bias{
const_cast<typename Convolution::ElementBias*>(d_bias),
Convolution::LayoutBias::packed({1, 1, 1, conv_param.K})};
typename Convolution::TensorRefDst tensor_z{
const_cast<typename Convolution::ElementDst*>(d_z),
Convolution::LayoutDst::packed(
{conv_param.N, conv_param.P, conv_param.Q, conv_param.K})};
typename Convolution::TensorRefDst tensor_dst{
d_dst,
Convolution::LayoutDst::packed(
{conv_param.N, conv_param.P, conv_param.Q, conv_param.K})};
typename Convolution::Arguments arguments{conv_param,
tensor_src.non_const_ref(),
tensor_filter.non_const_ref(),
tensor_bias.non_const_ref(),
tensor_z.non_const_ref(),
tensor_dst.non_const_ref(),
epilogue,
{},
{},
extra_param};
Convolution conv_op;
cutlass_check(conv_op.initialize(arguments, workspace));
cutlass_check(conv_op(stream));
after_kernel_launch();
}
// vim: syntax=cuda.doxygen
../implicit_gemm_conv_bias_cutlass_wrapper.cuinl
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册