From 14e9ad625d9b54765ff1e4a245e6abbd10d3dd8b Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Wed, 6 Apr 2022 20:56:31 +0800 Subject: [PATCH] fix(megdnn): emit define-but-not-referenced and extra-;-ignored warning on cuda9.0~cuda9.1 GitOrigin-RevId: f6db42e3958499c214bf3fc6f15ed7e8d8321da3 --- dnn/include/megdnn/arch.h | 26 ++++++++--------- dnn/include/megdnn/dtype.h | 12 ++++---- dnn/include/megdnn/thin/small_vector.h | 2 +- dnn/src/common/basic_types.cpp | 4 +-- dnn/src/common/elemwise_helper.cuh | 4 +-- dnn/src/common/named_tensor.cpp | 2 +- dnn/src/cuda/argsort/argsort.cu | 2 ++ dnn/src/cuda/checksum/kern.cu | 2 ++ dnn/src/cuda/conv_bias/chanwise/kern.cuh | 6 ++-- .../conv_bias/matmul/inplace_matmul_impl.cu | 2 ++ .../reduce_with_scale_data.cu | 4 ++- dnn/src/cuda/conv_bias/reduce_filter.cu | 2 ++ .../backward_filter/inplace_matmul_impl.cu | 2 ++ .../forward/inplace_matmul_impl.cu | 2 ++ dnn/src/cuda/convolution_helper/config.cuh | 4 +-- .../conv_trait/ibatch_conv_trait.cuh | 6 ++-- .../conv_trait/iconv_imma_trait.cuh | 10 +++---- .../conv_trait/iconv_trait.cuh | 4 +-- .../global_memory_visitor_cixhw.cuh | 2 +- .../global_memory_visitor_imma_cixn.cuh | 2 +- .../global_memory_visitor_imma_cixwixn.cuh | 2 +- .../global_memory_visitor_imma_cixwoxn.cuh | 2 +- .../global_memory_visitor_imma_fwxco.cuh | 2 +- dnn/src/cuda/cumsum/kern_impl.cu | 2 ++ dnn/src/cuda/elemwise/special_kerns.inl | 2 +- .../kernel_common/diagnostic_epilogue.cuh | 24 +++++++++++++++ .../kernel_common/diagnostic_prologue.cuh | 29 +++++++++++++++++++ .../local_share_bwd_data_f32_implicit_gemm.cu | 2 ++ ...ocal_share_bwd_filter_f32_implicit_gemm.cu | 2 ++ ...cal_share_fwd_chwn_f32_batch_size_aware.cu | 4 ++- ...d_chwn_f32_batch_size_aware_small_image.cu | 2 ++ .../uint4x4x32_wmma/wmma_matrix_mul_u4.cu | 2 +- dnn/src/cuda/pooling/pooling2d_qint.cu | 7 +++-- dnn/src/cuda/query_blocksize_impl.cu | 2 ++ .../cuda/relayout_format/relayout_format.cu | 5 +++- .../relayout_format/relayout_format_kern.cuh | 6 ++-- .../relayout_format/relayout_format_utils.cuh | 2 +- dnn/src/cuda/resize/forward.cu | 2 ++ dnn/src/cuda/type_cvt/kern.cu | 2 ++ dnn/src/cuda/warp_affine/warp_affine.cu | 2 ++ dnn/src/cuda/warp_perspective/forward.cu | 2 ++ dnn/src/naive/relayout_format/opr_impl.cpp | 2 +- src/opr/test/basic_arith/elemwise.cpp | 4 +-- 43 files changed, 152 insertions(+), 59 deletions(-) create mode 100644 dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh create mode 100644 dnn/src/cuda/kernel_common/diagnostic_prologue.cuh diff --git a/dnn/include/megdnn/arch.h b/dnn/include/megdnn/arch.h index ebaa59f8c..ce6f76cbc 100644 --- a/dnn/include/megdnn/arch.h +++ b/dnn/include/megdnn/arch.h @@ -36,15 +36,15 @@ #define MEGDNN_ALWAYS_INLINE inline __attribute__((__always_inline__)) #endif -#define MEGDNN_DEPRECATED __attribute__((deprecated)) -#define MEGDNN_PACKED __attribute__((packed)) -#define MEGDNN_CONSTEXPR constexpr -#define MEGDNN_NOEXCEPT noexcept -#define MEGDNN_STATIC_ASSERT static_assert -#define MEGDNN_FINAL final -#define MEGDNN_NORETURN __attribute__((noreturn)) -#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) -#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd))) +#define MEGDNN_DEPRECATED __attribute__((deprecated)) +#define MEGDNN_PACKED __attribute__((packed)) +#define MEGDNN_CONSTEXPR constexpr +#define MEGDNN_NOEXCEPT noexcept +#define MEGDNN_STATIC_ASSERT(cond, msg) static_assert(cond, msg); +#define MEGDNN_FINAL final +#define MEGDNN_NORETURN __attribute__((noreturn)) +#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result)) +#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd))) #if defined(__clang_major__) && (__clang_major__ >= 7) #define MEGDNN_LAMBDA_ATTRIBUTE_TARGET(simd) __attribute__((target(simd))) #else @@ -64,10 +64,10 @@ #define MEGDNN_DEPRECATED #define MEGDNN_PACKED -#define MEGDNN_CONSTEXPR constexpr -#define MEGDNN_NOEXCEPT noexcept -#define MEGDNN_STATIC_ASSERT static_assert -#define MEGDNN_FINAL final +#define MEGDNN_CONSTEXPR constexpr +#define MEGDNN_NOEXCEPT noexcept +#define MEGDNN_STATIC_ASSERT(cond, msg) static_assert(cond, msg); +#define MEGDNN_FINAL final #if defined(_MSC_VER) #define MEGDNN_NORETURN __declspec(noreturn) diff --git a/dnn/include/megdnn/dtype.h b/dnn/include/megdnn/dtype.h index 7a8c876bd..7b381bcf4 100644 --- a/dnn/include/megdnn/dtype.h +++ b/dnn/include/megdnn/dtype.h @@ -311,11 +311,11 @@ public: #ifdef __clang__ #pragma clang diagnostic pop #endif -MEGDNN_STATIC_ASSERT(sizeof(dt_byte) == 1, "bad dt_byte size"); -MEGDNN_STATIC_ASSERT(sizeof(dt_qint1) == 1, "bad dt_qint1 size"); -MEGDNN_STATIC_ASSERT(sizeof(dt_quint8) == 1, "bad dt_quint8 size"); -MEGDNN_STATIC_ASSERT(sizeof(dt_qint16) == 2, "bad dt_qint16 size"); -MEGDNN_STATIC_ASSERT(sizeof(dt_qint32) == 4, "bad dt_qint32 size"); +MEGDNN_STATIC_ASSERT(sizeof(dt_byte) == 1, "bad dt_byte size") +MEGDNN_STATIC_ASSERT(sizeof(dt_qint1) == 1, "bad dt_qint1 size") +MEGDNN_STATIC_ASSERT(sizeof(dt_quint8) == 1, "bad dt_quint8 size") +MEGDNN_STATIC_ASSERT(sizeof(dt_qint16) == 2, "bad dt_qint16 size") +MEGDNN_STATIC_ASSERT(sizeof(dt_qint32) == 4, "bad dt_qint32 size") typedef float dt_float32; typedef int32_t dt_int32; typedef int16_t dt_int16; @@ -613,7 +613,7 @@ template struct log { static MEGDNN_CONSTEXPR size_t value = log<(n >> 1)>::value + 1; #if MEGDNN_CC_HOST - MEGDNN_STATIC_ASSERT((n & (n - 1)) == 0, "only full power number can have log"); + MEGDNN_STATIC_ASSERT((n & (n - 1)) == 0, "only full power number can have log") #endif }; template <> diff --git a/dnn/include/megdnn/thin/small_vector.h b/dnn/include/megdnn/thin/small_vector.h index b6ccf1901..80d5404a6 100644 --- a/dnn/include/megdnn/thin/small_vector.h +++ b/dnn/include/megdnn/thin/small_vector.h @@ -333,7 +333,7 @@ public: } else if (n > this->size()) { if (this->capacity() < n) this->grow(n); - for (auto it = this->end(), end = this->begin() + n; it != end; ++it) + for (iterator it = this->end(), end = this->begin() + n; it != end; ++it) new (&*it) T(); this->set_end(this->begin() + n); } diff --git a/dnn/src/common/basic_types.cpp b/dnn/src/common/basic_types.cpp index 35375ed03..be2d97696 100644 --- a/dnn/src/common/basic_types.cpp +++ b/dnn/src/common/basic_types.cpp @@ -144,7 +144,7 @@ size_t TensorShape::total_nr_elems() const { } bool TensorShape::eq_shape(const TensorShape& rhs) const { - MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); + MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code") if (ndim == rhs.ndim) { size_t eq = 0; switch (ndim) { @@ -379,7 +379,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const { dtype == rhs.dtype, "could not compare layout on different dtypes: %s vs %s", dtype.name(), rhs.dtype.name()); - MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); + MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code") auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) { return (shape0 == shape1) & ((shape0 <= 1) | (stride0 == stride1)); diff --git a/dnn/src/common/elemwise_helper.cuh b/dnn/src/common/elemwise_helper.cuh index e91f92a78..e7ca6f74e 100644 --- a/dnn/src/common/elemwise_helper.cuh +++ b/dnn/src/common/elemwise_helper.cuh @@ -98,10 +98,10 @@ MEGDNN_DEVICE MEGDNN_HOST inline T round_mulh_saturate(T a, T b) { MEGDNN_STATIC_ASSERT( std::numeric_limits::digits <= 32, "Portable RMULH is not supported for integer " - "types larger than 32 bits."); + "types larger than 32 bits.") MEGDNN_STATIC_ASSERT( std::numeric_limits::is_integer, - "Input types should be integer for RMULH"); + "Input types should be integer for RMULH") bool overflow = a == b && a == DTypeTrait::min(); // TODO: This really should be // rounding_shift_right_away_from_zero, but we haven't yet found a fast way diff --git a/dnn/src/common/named_tensor.cpp b/dnn/src/common/named_tensor.cpp index 9e952475d..2b97230c1 100644 --- a/dnn/src/common/named_tensor.cpp +++ b/dnn/src/common/named_tensor.cpp @@ -185,7 +185,7 @@ NamedTensorShape::NamedTensorShape(std::initializer_list init_shape) : NamedTensorShape(SmallVector{init_shape}) {} bool NamedTensorShape::eq_shape(const NamedTensorShape& rhs) const { - MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code"); + MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code") if (ndim == rhs.ndim) { size_t eq = 0; switch (ndim) { diff --git a/dnn/src/cuda/argsort/argsort.cu b/dnn/src/cuda/argsort/argsort.cu index 25236c9fe..16326f034 100644 --- a/dnn/src/cuda/argsort/argsort.cu +++ b/dnn/src/cuda/argsort/argsort.cu @@ -16,6 +16,7 @@ #include "src/cuda/cub/device/device_radix_sort.cuh" #include "src/cuda/cub/device/device_segmented_radix_sort.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -175,4 +176,5 @@ INST_CUB_SORT(uint64_t) #undef INST_FORWARD } // namespace cuda } // namespace megdnn +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: ft=cuda syntax=cuda.doxygen diff --git a/dnn/src/cuda/checksum/kern.cu b/dnn/src/cuda/checksum/kern.cu index 8edc8a4d0..81d5b433e 100644 --- a/dnn/src/cuda/checksum/kern.cu +++ b/dnn/src/cuda/checksum/kern.cu @@ -10,6 +10,7 @@ */ #include "./kern.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/reduce_helper.cuh" #include "src/cuda/utils.cuh" @@ -63,4 +64,5 @@ void megdnn::cuda::checksum::calc( size_t megdnn::cuda::checksum::get_workspace_in_bytes(size_t nr_elem) { return get_reduce_workspace_in_bytes(1, nr_elem, 1); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: ft=cpp syntax=cpp.doxygen diff --git a/dnn/src/cuda/conv_bias/chanwise/kern.cuh b/dnn/src/cuda/conv_bias/chanwise/kern.cuh index 58d239e8c..15cb5ec59 100644 --- a/dnn/src/cuda/conv_bias/chanwise/kern.cuh +++ b/dnn/src/cuda/conv_bias/chanwise/kern.cuh @@ -25,13 +25,13 @@ namespace conv_bias { namespace chanwise { struct Param { - uint32_t batch, src_chl, src_h, src_w, chl_mul, flt_h, flt_w, out_h, out_w, pad_h, - pad_w, stride_h, stride_w, dilation_h, dilation_w; + int batch, src_chl, src_h, src_w, chl_mul, flt_h, flt_w, out_h, out_w, pad_h, pad_w, + stride_h, stride_w, dilation_h, dilation_w; bool is_compute_deafult; #if MEGDNN_CC_HOST static Param from_fwd_args( const BiasForwardSizeArgs& args, bool is_compute_deafult_ = true) { -#define U(v) static_cast(v) +#define U(v) static_cast(v) auto&& src = args.src_layout->shape; auto&& dst = args.dst_layout->shape; auto&& fm = args.filter_meta; diff --git a/dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu b/dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu index af6c1860d..be44c4491 100644 --- a/dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu +++ b/dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "src/cuda/conv_bias/matmul/inplace_matmul_impl.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/utils.cuh" using namespace megdnn; @@ -386,4 +387,5 @@ void conv_bias::exec_inplace_matmul_fwd( after_kernel_launch(); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu b/dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu index 6041f267b..c436eb47e 100644 --- a/dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu +++ b/dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu @@ -37,6 +37,7 @@ #include "./reduce_with_scale_data.cuh" #include "./wmma_conv_integer_u4.cuh" #include "src/cuda/cub/util_ptx.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -50,7 +51,7 @@ template < struct TileCounter { MEGDNN_STATIC_ASSERT( thread_blk_x % WARP_SIZE == 0, - "thread block size in dim x not divided by warpSize"); + "thread block size in dim x not divided by warpSize") static const size_t spatial_tile_x = thread_blk_x * pixels_per_thread_x; static const size_t spatial_tile_y = thread_blk_y * pixels_per_thread_y; static const size_t global_load_tile_x = @@ -678,4 +679,5 @@ void megdnn::cuda::do_dispatch_reduce_with_scale_data_u4( after_kernel_launch(); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: ft=cpp syntax=cuda.doxygen diff --git a/dnn/src/cuda/conv_bias/reduce_filter.cu b/dnn/src/cuda/conv_bias/reduce_filter.cu index 779c3184e..28d027e3c 100644 --- a/dnn/src/cuda/conv_bias/reduce_filter.cu +++ b/dnn/src/cuda/conv_bias/reduce_filter.cu @@ -40,6 +40,7 @@ #include "src/cuda/utils.cuh" #include "src/cuda/integer_subbyte_utils.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/reduce_helper.cuh" using namespace megdnn; @@ -163,4 +164,5 @@ size_t megdnn::cuda::do_dispatch_reduce_workspace_in_bytes( return get_reduce_workspace_in_bytes>(A, B, C); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: ft=cpp syntax=cuda.doxygen diff --git a/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu b/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu index 0f06a3fe5..9053adec0 100644 --- a/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu +++ b/dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu @@ -11,6 +11,7 @@ #include #include #include "./inplace_matmul_impl.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/utils.cuh" using namespace megdnn; using namespace cuda; @@ -429,4 +430,5 @@ void convolution3d::exec_inplace_matmul_bwd_filter( after_kernel_launch(); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu b/dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu index 4fc447473..56b9cd747 100644 --- a/dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu +++ b/dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu @@ -11,6 +11,7 @@ #include #include #include "./inplace_matmul_impl.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/utils.cuh" using namespace megdnn; using namespace cuda; @@ -401,4 +402,5 @@ void convolution3d::exec_inplace_matmul_fwd( after_kernel_launch(); } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/convolution_helper/config.cuh b/dnn/src/cuda/convolution_helper/config.cuh index 35218ba1a..6163a357a 100644 --- a/dnn/src/cuda/convolution_helper/config.cuh +++ b/dnn/src/cuda/convolution_helper/config.cuh @@ -54,9 +54,9 @@ struct RegBlockConfig { static int constexpr reg_n = reg_n_; static int constexpr reg_k = reg_k_; MEGDNN_STATIC_ASSERT( - reg_m % pack_size == 0, "reg_m must be a multiple of pack_size"); + reg_m % pack_size == 0, "reg_m must be a multiple of pack_size") MEGDNN_STATIC_ASSERT( - reg_k % pack_size == 0, "reg_k must be a multiple of pack_size"); + reg_k % pack_size == 0, "reg_k must be a multiple of pack_size") static int constexpr reg_k_packed = reg_k / pack_size; static int constexpr reg_m_packed = reg_m / pack_size; static int constexpr reg_width = reg_width_; diff --git a/dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh b/dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh index c67bb20df..f4c7c6a87 100644 --- a/dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh +++ b/dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh @@ -57,7 +57,7 @@ namespace convolution { using KernLayout = _kern_layout; \ using OutputLayout = _output_layout; \ using Param = _conv_param; \ - static constexpr bool check_bounds = check_bounds_; + static constexpr bool check_bounds = check_bounds_ #define MEGDNN_COMMA , template < @@ -80,7 +80,7 @@ struct IBatchConvTrait_f1x1s1x1 { static int constexpr block_tile_batch = RegBlockConfig::reg_n; MEGDNN_STATIC_ASSERT( block_tile_batch == 1, - "this algorithm does not unroll on batch dimension"); + "this algorithm does not unroll on batch dimension") static int constexpr block_tile_out_height_width = RegBlockConfig::reg_width * ThreadConfig::nr_thread_x; static int constexpr block_tile_in_channel = RegBlockConfig::reg_k; @@ -162,7 +162,7 @@ struct IBatchConvTrait { static int constexpr block_tile_batch = RegBlockConfig::reg_n; MEGDNN_STATIC_ASSERT( block_tile_batch == 1, - "this algorithm does not unroll on batch dimension"); + "this algorithm does not unroll on batch dimension") static int constexpr block_tile_out_height_width = RegBlockConfig::reg_width * ThreadConfig::nr_thread_x; static int constexpr block_tile_in_channel = RegBlockConfig::reg_k; diff --git a/dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh b/dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh index 0481e7394..443151779 100644 --- a/dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh +++ b/dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh @@ -54,7 +54,7 @@ namespace convolution { using KernLayout = _kern_layout; \ using OutputLayout = _output_layout; \ using Param = _conv_param; \ - static constexpr bool check_bounds = check_bounds_; + static constexpr bool check_bounds = check_bounds_ #define MEGDNN_COMMA , template < @@ -183,7 +183,7 @@ struct IConvIMMATraitReorderFilter { check_bounds MEGDNN_COMMA IMMAConfig MEGDNN_COMMA WarpTileConfig MEGDNN_COMMA ThreadConfig>::src_dtype MEGDNN_COMMA src_dtype>::value == true, - "data type of input tensor should be int8_t"); + "data type of input tensor should be int8_t") using DataTileCount = typename IConvIMMATrait< check_bounds, IMMAConfig, WarpTileConfig, ThreadConfig>::DataTileCount; struct FilterTileCount { @@ -284,7 +284,7 @@ struct IConvIMMATraitUnrollWidth { check_bounds MEGDNN_COMMA IMMAConfig MEGDNN_COMMA WarpTileConfig MEGDNN_COMMA ThreadConfig>:: filter_dtype MEGDNN_COMMA filter_dtype>::value == true, - "data type of filter tensor should be int8_t"); + "data type of filter tensor should be int8_t") using FilterTileCount = typename IConvIMMATraitReorderFilter< check_bounds, IMMAConfig, WarpTileConfig, ThreadConfig>::FilterTileCount; using BlockTileIterator = @@ -345,7 +345,7 @@ struct IConvIMMATraitUnrollWidthV2 { MEGDNN_STATIC_ASSERT( WarpTileConfig::warp_tile_k == 1, "kernel unrolling along width axis assumes tile k " - "in warp-level must be 1"); + "in warp-level must be 1") using copy_t = int4; using smem_storage_dtype = smem_storage_dtype; static int constexpr load_width = sizeof(copy_t) / sizeof(smem_storage_dtype); @@ -388,7 +388,7 @@ struct IConvIMMATraitUnrollWidthV2 { MEGDNN_STATIC_ASSERT( WarpTileConfig::warp_tile_k == 1, "kernel unrolling along width axis assumes tile k " - "in warp-level must be 1"); + "in warp-level must be 1") using copy_t = int4; using smem_storage_dtype = smem_storage_dtype; static int constexpr load_width = sizeof(copy_t) / sizeof(smem_storage_dtype); diff --git a/dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh b/dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh index d4c52724e..5adcbdd9b 100644 --- a/dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh +++ b/dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh @@ -54,7 +54,7 @@ namespace convolution { using KernLayout = _kern_layout; \ using OutputLayout = _output_layout; \ using Param = _conv_param; \ - static constexpr bool check_bounds = check_bounds_; + static constexpr bool check_bounds = check_bounds_ #define MEGDNN_COMMA , template < @@ -175,7 +175,7 @@ struct IConvTraitUnrollWidth { check_bounds MEGDNN_COMMA ldg_dtype MEGDNN_COMMA RegBlockConfig MEGDNN_COMMA ThreadConfig>:: filter_dtype MEGDNN_COMMA filter_dtype>::value == true, - "data type of filter tensor should be int8_t"); + "data type of filter tensor should be int8_t") using FilterTileCount = typename IConvTrait< check_bounds, ldg_dtype, RegBlockConfig, ThreadConfig>::FilterTileCount; using BlockTileIterator = diff --git a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh index 554202fe4..881c8ab96 100644 --- a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh +++ b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh @@ -108,7 +108,7 @@ struct Global2ShareMemVisitor_CIxHW; DEF(true, Layout) copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d]; -MEGDNN_STATIC_ASSERT(load_width == 4, "load four element from src tensor per time"); +MEGDNN_STATIC_ASSERT(load_width == 4, "load four element from src tensor per time") __device__ Global2ShareMemVisitor_CIxHW( smem_storage_dtype* smem_, const int* __restrict__ offset_) diff --git a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh index 38ff7d6a7..d8e386c6b 100644 --- a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh +++ b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh @@ -169,7 +169,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x; copy_t reg[TileCount::reg_h][TileCount::reg_w]; MEGDNN_STATIC_ASSERT( std::is_same::value == true, - "ldg data type must be int4 for this memory visitor"); + "ldg data type must be int4 for this memory visitor") __device__ __forceinline__ void init_stride(Layout layout) { stride = layout.channel_stride / TileCount::ldg_load_width; diff --git a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh index 0c44f5d52..3a3f2791c 100644 --- a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh +++ b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh @@ -65,7 +65,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x; copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d]; MEGDNN_STATIC_ASSERT( std::is_same::value == true, - "ldg data type must be int4 for this memory visitor"); + "ldg data type must be int4 for this memory visitor") __device__ __forceinline__ void init_stride(Layout layout) { stride = layout.channel_stride / TileCount::ldg_load_width; diff --git a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh index cb14ea429..6b55c0205 100644 --- a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh +++ b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh @@ -66,7 +66,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x; copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d]; MEGDNN_STATIC_ASSERT( std::is_same::value == true, - "ldg data type must be int4 for this memory visitor"); + "ldg data type must be int4 for this memory visitor") __device__ __forceinline__ void init_stride(Layout layout) { stride = layout.channel_stride / TileCount::ldg_load_width; diff --git a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh index 6b576a347..6e67bfc36 100644 --- a/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh +++ b/dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh @@ -63,7 +63,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x; copy_t reg[TileCount::reg_h][TileCount::reg_w]; MEGDNN_STATIC_ASSERT( std::is_same::value == true, - "ldg data type must be int4 for this memory visitor"); + "ldg data type must be int4 for this memory visitor") __device__ __forceinline__ void init_stride(Layout layout) { stride = layout.width_stride / TileCount::ldg_load_width; diff --git a/dnn/src/cuda/cumsum/kern_impl.cu b/dnn/src/cuda/cumsum/kern_impl.cu index d16a6c288..08dcfc03a 100644 --- a/dnn/src/cuda/cumsum/kern_impl.cu +++ b/dnn/src/cuda/cumsum/kern_impl.cu @@ -12,6 +12,7 @@ #include "./kern.cuh" #include "./kern_helper.cuh" #include "./kern_impl.cuinl" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn::cuda; using namespace cumsum::detail::cubwrap; @@ -88,4 +89,5 @@ void cumsum::get_BX_BY( BY = 512 / BX; } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/elemwise/special_kerns.inl b/dnn/src/cuda/elemwise/special_kerns.inl index c5cffcdc3..160f072c6 100644 --- a/dnn/src/cuda/elemwise/special_kerns.inl +++ b/dnn/src/cuda/elemwise/special_kerns.inl @@ -125,7 +125,7 @@ struct OpCallerBinary, PVis0, PVis1> { PVis0 par0; PVis1 par1; MEGDNN_STATIC_ASSERT( - PVis0::packed_size == PVis1::packed_size, "vector size mismatch"); + PVis0::packed_size == PVis1::packed_size, "vector size mismatch") static const uint32_t packed_size = PVis0::packed_size; __device__ __forceinline__ void thread_init(uint32_t idx) { diff --git a/dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh b/dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh new file mode 100644 index 000000000..1b87671ae --- /dev/null +++ b/dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh @@ -0,0 +1,24 @@ +/** + * \file dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#ifdef __GNUC__ +#if CUDA_VERSION < 9020 +#pragma GCC diagnostic pop +#endif +#endif + +#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED +#undef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED +#else +#error "diagnostic_epilogue.h must be included after diagnostic_prologue.h" +#endif + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/kernel_common/diagnostic_prologue.cuh b/dnn/src/cuda/kernel_common/diagnostic_prologue.cuh new file mode 100644 index 000000000..7ef1ced30 --- /dev/null +++ b/dnn/src/cuda/kernel_common/diagnostic_prologue.cuh @@ -0,0 +1,29 @@ +/** + * \file dnn/src/cuda/kernel_common/diagnostic_prologue.cuh + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ + +#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED +#error "diagnostic_prologue.h included twice without including diagnostic_epilogue.h" +#else +#define MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED +#endif + +//! see +//! https://stackoverflow.com/questions/49836419/how-to-hide-nvccs-function-was-declared-but-never-referenced-warnings +//! for more details. +#ifdef __GNUC__ +#if CUDA_VERSION < 9020 +#pragma GCC diagnostic push +#pragma diag_suppress 177 // suppress "function was declared but never referenced + // warning" +#endif +#endif + +// vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu b/dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu index e58bc7bed..19dd3e1f8 100644 --- a/dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu +++ b/dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "./local_share_bwd_data.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -580,4 +581,5 @@ void megdnn::cuda::local_share_bwd_data::_do_local_share_bwd_data_implicit_gemm( } } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cuda.doxygen diff --git a/dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu b/dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu index 36bfab579..5bb958781 100644 --- a/dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu +++ b/dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu @@ -10,6 +10,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "./local_share_bwd_filter.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -495,4 +496,5 @@ void megdnn::cuda::local_share_bwd_filter::_do_local_share_bwd_filter_implicit_g } } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cuda.doxygen diff --git a/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu b/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu index 0dd1a8894..60f6b454e 100644 --- a/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu +++ b/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "./local_share_forward.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -54,7 +55,7 @@ struct FilterTileCount { MEGDNN_STATIC_ASSERT( smem_w % ThreadConfig::nr_thread_x == 0, - "col of share memory must be divided by nr_thread_x"); + "col of share memory must be divided by nr_thread_x") static int const reg_h = (smem_h + ThreadConfig::nr_thread_y - 1) / ThreadConfig::nr_thread_y; static int const reg_w = smem_w / ThreadConfig::nr_thread_x; @@ -1292,4 +1293,5 @@ void megdnn::cuda::local_share::_do_local_share_convolution_large_batch_size( } } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cuda.doxygen diff --git a/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu b/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu index 5fd46d85c..7578f2217 100644 --- a/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu +++ b/dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu @@ -10,6 +10,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ #include "./local_share_forward.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -582,4 +583,5 @@ void megdnn::cuda::local_share:: } } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cuda.doxygen diff --git a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu index 8ecd6b42f..b944019f6 100644 --- a/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu +++ b/dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu @@ -79,7 +79,7 @@ struct GlobalToShareMemStream { GlobalToShareMemStreamConfig_::BlockSize == GlobalToShareMemStreamConfig_::CACHE_SIZE * BlockConfig_::WARPS_PER_BLOCK, - "Block size mismatch"); + "Block size mismatch") uint8_t* smem; const uint8_t* g_ptr; diff --git a/dnn/src/cuda/pooling/pooling2d_qint.cu b/dnn/src/cuda/pooling/pooling2d_qint.cu index e9da27256..2f85b1256 100644 --- a/dnn/src/cuda/pooling/pooling2d_qint.cu +++ b/dnn/src/cuda/pooling/pooling2d_qint.cu @@ -11,6 +11,7 @@ */ #include "./pooling2d_qint.cuh" #include "src/common/opr_param_defs_enumv.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/query_blocksize.cuh" using namespace megdnn; @@ -350,7 +351,7 @@ __global__ void pooling2d_device_template_nchwc( static int constexpr section = pack_byte / sizeof(ldg_type); MEGDNN_STATIC_ASSERT( ldg_width == ldg_width_assert, - "pooling2d (NCHW64) kernel must use 128bit width ldg instruction"); + "pooling2d (NCHW64) kernel must use 128bit width ldg instruction") const int c_packed = param.c / pack_size; const int batch = tid / (param.ho * param.wo * c_packed * section); const int batch_residual = tid - batch * param.ho * param.wo * c_packed * section; @@ -402,7 +403,7 @@ __global__ void pooling2d_device_template_nhwc( static int constexpr ldg_width_bytes = sizeof(ldg_type); MEGDNN_STATIC_ASSERT( ldg_width == ldg_width_assert, - "pooling2d (NHWC) kernel must ldg_width == ldg_width_assert"); + "pooling2d (NHWC) kernel must ldg_width == ldg_width_assert") const int c_packed = param.c / pack_size; const int batch = tid / (param.ho * param.wo * c_packed); const int batch_residual = tid - batch * param.ho * param.wo * c_packed; @@ -691,4 +692,6 @@ void megdnn::cuda::pooling2d::do_pooling2d_int4_nhwc( kern<<>>(d_src, d_dst, param, zero_point); after_kernel_launch(); } + +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cuda.doxygen diff --git a/dnn/src/cuda/query_blocksize_impl.cu b/dnn/src/cuda/query_blocksize_impl.cu index 86044a96b..9adb04eae 100644 --- a/dnn/src/cuda/query_blocksize_impl.cu +++ b/dnn/src/cuda/query_blocksize_impl.cu @@ -9,6 +9,7 @@ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. */ +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/query_blocksize.cuh" #include "src/cuda/utils.cuh" @@ -51,4 +52,5 @@ LaunchConfig cuda::detail::query_launch_config_for_kernel_uncached( return ret; } +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}} diff --git a/dnn/src/cuda/relayout_format/relayout_format.cu b/dnn/src/cuda/relayout_format/relayout_format.cu index 1e3410a4c..998861085 100644 --- a/dnn/src/cuda/relayout_format/relayout_format.cu +++ b/dnn/src/cuda/relayout_format/relayout_format.cu @@ -10,6 +10,7 @@ * implied. */ +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/query_blocksize.cuh" #include "src/cuda/relayout_format/relayout_format_kern.cuh" @@ -104,7 +105,7 @@ __global__ void kern_nchw_nchwx( MEGDNN_STATIC_ASSERT( std::is_same::value, "Currently this kernel only support accessing tensor " - "src and dst in same data type."); + "src and dst in same data type.") n_stride_src /= size_src_type; ic_stride /= size_src_type; n_stride_dst /= size_dst_type; @@ -585,3 +586,5 @@ void relayout_format::relayout_format_cuda_nchw_nchw4_weight( ic_stride, oc_stride_dst, group_stride_src, group_stride_dst, 0, {}); after_kernel_launch(); } + +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" diff --git a/dnn/src/cuda/relayout_format/relayout_format_kern.cuh b/dnn/src/cuda/relayout_format/relayout_format_kern.cuh index 889130173..7e37cb36c 100644 --- a/dnn/src/cuda/relayout_format/relayout_format_kern.cuh +++ b/dnn/src/cuda/relayout_format/relayout_format_kern.cuh @@ -478,12 +478,12 @@ struct RelayoutProblem { using Transpose = Transpose_; using CudaPostProcess = CudaPostProcess_; MEGDNN_STATIC_ASSERT( - SrcIterator::chan_blk == DstIterator::chan_blk, "channel block mismatch"); + SrcIterator::chan_blk == DstIterator::chan_blk, "channel block mismatch") MEGDNN_STATIC_ASSERT( - SrcIterator::width == DstIterator::width, "width block mismatch"); + SrcIterator::width == DstIterator::width, "width block mismatch") MEGDNN_STATIC_ASSERT( SrcIterator::size_nbits == DstIterator::size_nbits, - "size in bits of elements mismatch"); + "size in bits of elements mismatch") static constexpr int pack_chan = SrcIterator::chan_blk; static constexpr int pack_width = SrcIterator::width; using DnnSrcType = typename CudaPostProcess::SrcType; diff --git a/dnn/src/cuda/relayout_format/relayout_format_utils.cuh b/dnn/src/cuda/relayout_format/relayout_format_utils.cuh index daa2cf8f6..5ebba2191 100644 --- a/dnn/src/cuda/relayout_format/relayout_format_utils.cuh +++ b/dnn/src/cuda/relayout_format/relayout_format_utils.cuh @@ -80,7 +80,7 @@ inline __device__ DstType make_zero_pad(const uint8_t zero_point) { template <> inline __device__ char4 make_zero_pad(const uint8_t zero_point) { - char izp = reinterpret_cast(zero_point); + signed char izp = reinterpret_cast(zero_point); return {izp, izp, izp, izp}; } diff --git a/dnn/src/cuda/resize/forward.cu b/dnn/src/cuda/resize/forward.cu index deeb21f93..71907a5c2 100644 --- a/dnn/src/cuda/resize/forward.cu +++ b/dnn/src/cuda/resize/forward.cu @@ -16,6 +16,7 @@ #include "src/common/resize.cuh" #include "src/cuda/cv/kernel_common.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -319,4 +320,5 @@ INST(int8_t); } // namespace cuda } // namespace megdnn +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/type_cvt/kern.cu b/dnn/src/cuda/type_cvt/kern.cu index 94ec995ba..007fd1bef 100644 --- a/dnn/src/cuda/type_cvt/kern.cu +++ b/dnn/src/cuda/type_cvt/kern.cu @@ -13,6 +13,7 @@ #include "megdnn/dtype.h" #include "src/cuda/elemwise_helper.cuh" #include "src/cuda/elemwise_helper_q4.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" using namespace megdnn; using namespace cuda; @@ -410,4 +411,5 @@ MEGDNN_FOREACH_COMPUTING_CTYPE(INST_SRC_NORMAL_LOWBIT) } // namespace cuda } // namespace megdnn +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/warp_affine/warp_affine.cu b/dnn/src/cuda/warp_affine/warp_affine.cu index c52f86e94..d13686ce9 100644 --- a/dnn/src/cuda/warp_affine/warp_affine.cu +++ b/dnn/src/cuda/warp_affine/warp_affine.cu @@ -12,6 +12,7 @@ #include #include "src/common/rounding_converter.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/utils.cuh" #include "src/cuda/warp_affine/common.cuh" @@ -288,4 +289,5 @@ INST(int8_t) } // namespace cuda } // namespace megdnn +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/cuda/warp_perspective/forward.cu b/dnn/src/cuda/warp_perspective/forward.cu index b6c5e8cf8..2d251f094 100644 --- a/dnn/src/cuda/warp_perspective/forward.cu +++ b/dnn/src/cuda/warp_perspective/forward.cu @@ -16,6 +16,7 @@ #include "src/common/rounding_converter.cuh" #include "src/cuda/error_info.cuh" #include "src/cuda/integer_subbyte_utils.cuh" +#include "src/cuda/kernel_common/diagnostic_prologue.cuh" #include "src/cuda/utils.cuh" #include "src/cuda/warp_perspective/common.cuh" @@ -1760,4 +1761,5 @@ INST(dt_quint8, uint8_t, float) } // namespace cuda } // namespace megdnn +#include "src/cuda/kernel_common/diagnostic_epilogue.cuh" // vim: syntax=cpp.doxygen diff --git a/dnn/src/naive/relayout_format/opr_impl.cpp b/dnn/src/naive/relayout_format/opr_impl.cpp index 0f2f91002..0831bb7a7 100644 --- a/dnn/src/naive/relayout_format/opr_impl.cpp +++ b/dnn/src/naive/relayout_format/opr_impl.cpp @@ -51,7 +51,7 @@ void lowbit_recursive_cp( MEGDNN_STATIC_ASSERT( !(8_z % size_nbits), "size in bits of lowbit data type can only be 1, 2, 4 " - "or 8"); + "or 8") if (idx < (src.layout.ndim - 1)) { for (size_t i = 0; i < src.layout[idx]; ++i) { lowbit_recursive_cp( diff --git a/src/opr/test/basic_arith/elemwise.cpp b/src/opr/test/basic_arith/elemwise.cpp index 2ed8d8bd2..b602c1d51 100644 --- a/src/opr/test/basic_arith/elemwise.cpp +++ b/src/opr/test/basic_arith/elemwise.cpp @@ -130,10 +130,10 @@ T do_round_mulh_saturate(T a, T b) { MEGDNN_STATIC_ASSERT( std::numeric_limits::digits <= 32, "Portable RMULH is not supported for integer " - "types larger than 32 bits."); + "types larger than 32 bits.") MEGDNN_STATIC_ASSERT( std::numeric_limits::is_integer, - "Input types should be integer for RMULH"); + "Input types should be integer for RMULH") bool overflow = a == b && a == DTypeTrait::min(); // TODO: This really should be // rounding_shift_right_away_from_zero, but we haven't yet found a fast -- GitLab