From 14b65e4da7d716c34425241ee7e1ec4a5d08fd72 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 10 May 2021 19:13:59 +0800 Subject: [PATCH] feat(dnn/cuda): add reduce_filter_and_update_bias GitOrigin-RevId: 31b6e6b0abe2790029e63c9f91c64290a1801958 --- dnn/src/cuda/conv_bias/quint4x4x32_wmma.cpp | 2 +- ..._with_scale_filter.cu => reduce_filter.cu} | 65 ++++++++++++++++++- ...ith_scale_filter.cuh => reduce_filter.cuh} | 16 ++++- 3 files changed, 77 insertions(+), 6 deletions(-) rename dnn/src/cuda/conv_bias/{reduce_with_scale_filter.cu => reduce_filter.cu} (67%) rename dnn/src/cuda/conv_bias/{reduce_with_scale_filter.cuh => reduce_filter.cuh} (87%) diff --git a/dnn/src/cuda/conv_bias/quint4x4x32_wmma.cpp b/dnn/src/cuda/conv_bias/quint4x4x32_wmma.cpp index 3dab44629..4a464fa16 100644 --- a/dnn/src/cuda/conv_bias/quint4x4x32_wmma.cpp +++ b/dnn/src/cuda/conv_bias/quint4x4x32_wmma.cpp @@ -15,7 +15,7 @@ #include "./quint4x4x32_wmma/activation_u4.cuh" #include "./quint4x4x32_wmma/reduce_with_scale_data.cuh" -#include "./reduce_with_scale_filter.cuh" +#include "./reduce_filter.cuh" #include "./quint4x4x32_wmma/wmma_conv_integer_u4.cuh" using namespace megdnn; diff --git a/dnn/src/cuda/conv_bias/reduce_with_scale_filter.cu b/dnn/src/cuda/conv_bias/reduce_filter.cu similarity index 67% rename from dnn/src/cuda/conv_bias/reduce_with_scale_filter.cu rename to dnn/src/cuda/conv_bias/reduce_filter.cu index 0fa352bf4..f03594303 100644 --- a/dnn/src/cuda/conv_bias/reduce_with_scale_filter.cu +++ b/dnn/src/cuda/conv_bias/reduce_filter.cu @@ -25,7 +25,7 @@ * **************************************************************************************************/ /** - * \file dnn/src/cuda/conv_bias/reduce_with_scale_filter.cu + * \file dnn/src/cuda/conv_bias/reduce_filter.cu * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. @@ -36,9 +36,11 @@ * implied. */ -#include "./reduce_with_scale_filter.cuh" -#include "src/cuda/reduce_helper.cuh" +#include "./reduce_filter.cuh" +#include "src/cuda/utils.cuh" + #include "src/cuda/integer_subbyte_utils.cuh" +#include "src/cuda/reduce_helper.cuh" using namespace megdnn; using namespace cuda; @@ -76,6 +78,38 @@ struct ReduceWithScaleInt4Op { #endif }; +template +struct ReduceUpdateBiasInt4Op { + typedef int32_t wtype; + const uint8_t* filter; + const int32_t* src_bias; + int32_t* dst_bias; + int32_t zero_point; + static const wtype INIT = 0; + +#if MEGDNN_CC_CUDA + __host__ __device__ void write(uint32_t idx, wtype val) { + dst_bias[idx] = src_bias[idx] - val * zero_point; + } + + __host__ __device__ static wtype apply(wtype a, wtype b) { return a + b; } + + __device__ wtype read(uint32_t idx) { + constexpr uint32_t subbytes_per_pixel = 8; + const uint32_t* fptr = + (const uint32_t*)(filter + subbytes_per_pixel * idx / 2); + uint32_t val = *fptr; + int32_t ret = 0; +#pragma unroll + for (int j = 0; j < 8; j++) { + ret += integer_subbyte::unpack_integer_4bits(val, + (j << 2)); + } + return ret; + } +#endif +}; + } // namespace template @@ -106,6 +140,31 @@ INST(false); INST(true); #undef INST +template +void megdnn::cuda::do_dispatch_reduce_filter_and_update_bias_4bit( + const uint8_t* filter, const int32_t* src_bias, uint32_t rows, + uint32_t cols, int32_t* dst_bias, int32_t* workspace, + int32_t zero_point, cudaStream_t stream) { + ReduceUpdateBiasInt4Op op; + op.filter = filter; + op.src_bias = src_bias; + op.dst_bias = dst_bias; + op.zero_point = zero_point; + run_reduce, false>(workspace, rows, cols, + 1, stream, op); +} + +#define INST(signedness) \ + template void \ + megdnn::cuda::do_dispatch_reduce_filter_and_update_bias_4bit( \ + const uint8_t* filter, const int32_t* src_bias, uint32_t rows, \ + uint32_t cols, int32_t* dst_bias, int32_t* workspace, \ + int32_t zero_point, cudaStream_t stream) + +INST(false); +INST(true); +#undef INST + size_t megdnn::cuda::do_dispatch_reduce_workspace_in_bytes(size_t A, size_t B, size_t C) { return get_reduce_workspace_in_bytes>(A, B, C); diff --git a/dnn/src/cuda/conv_bias/reduce_with_scale_filter.cuh b/dnn/src/cuda/conv_bias/reduce_filter.cuh similarity index 87% rename from dnn/src/cuda/conv_bias/reduce_with_scale_filter.cuh rename to dnn/src/cuda/conv_bias/reduce_filter.cuh index 595b3765f..cd42ba6b2 100644 --- a/dnn/src/cuda/conv_bias/reduce_with_scale_filter.cuh +++ b/dnn/src/cuda/conv_bias/reduce_filter.cuh @@ -25,7 +25,7 @@ * **************************************************************************************************/ /** - * \file dnn/src/cuda/conv_bias/reduce_with_scale_filter.cuh + * \file dnn/src/cuda/conv_bias/reduce_filter.cuh * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") * * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. @@ -36,16 +36,28 @@ * implied. */ -#include "src/cuda/utils.cuh" +#include +#include + +#include namespace megdnn { namespace cuda { + template void do_dispatch_reduce_with_scale_filter_4bit(const uint8_t* src, int32_t scale, uint32_t rows, uint32_t cols, int32_t* dst, cudaStream_t stream); + +template +void do_dispatch_reduce_filter_and_update_bias_4bit( + const uint8_t* filter, const int32_t* src_bias, uint32_t rows, + uint32_t cols, int32_t* dst_bias, int32_t* workspace, int zero_point, + cudaStream_t stream); + size_t do_dispatch_reduce_workspace_in_bytes(size_t A, size_t B, size_t C); + } // namespace cuda } // namespace megdnn -- GitLab