提交 14e9ad62 编写于 作者: M Megvii Engine Team

fix(megdnn): emit define-but-not-referenced and extra-;-ignored warning on cuda9.0~cuda9.1

GitOrigin-RevId: f6db42e3958499c214bf3fc6f15ed7e8d8321da3
上级 4c0bff1d
......@@ -36,15 +36,15 @@
#define MEGDNN_ALWAYS_INLINE inline __attribute__((__always_inline__))
#endif
#define MEGDNN_DEPRECATED __attribute__((deprecated))
#define MEGDNN_PACKED __attribute__((packed))
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT static_assert
#define MEGDNN_FINAL final
#define MEGDNN_NORETURN __attribute__((noreturn))
#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#define MEGDNN_DEPRECATED __attribute__((deprecated))
#define MEGDNN_PACKED __attribute__((packed))
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT(cond, msg) static_assert(cond, msg);
#define MEGDNN_FINAL final
#define MEGDNN_NORETURN __attribute__((noreturn))
#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#if defined(__clang_major__) && (__clang_major__ >= 7)
#define MEGDNN_LAMBDA_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#else
......@@ -64,10 +64,10 @@
#define MEGDNN_DEPRECATED
#define MEGDNN_PACKED
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT static_assert
#define MEGDNN_FINAL final
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT(cond, msg) static_assert(cond, msg);
#define MEGDNN_FINAL final
#if defined(_MSC_VER)
#define MEGDNN_NORETURN __declspec(noreturn)
......
......@@ -311,11 +311,11 @@ public:
#ifdef __clang__
#pragma clang diagnostic pop
#endif
MEGDNN_STATIC_ASSERT(sizeof(dt_byte) == 1, "bad dt_byte size");
MEGDNN_STATIC_ASSERT(sizeof(dt_qint1) == 1, "bad dt_qint1 size");
MEGDNN_STATIC_ASSERT(sizeof(dt_quint8) == 1, "bad dt_quint8 size");
MEGDNN_STATIC_ASSERT(sizeof(dt_qint16) == 2, "bad dt_qint16 size");
MEGDNN_STATIC_ASSERT(sizeof(dt_qint32) == 4, "bad dt_qint32 size");
MEGDNN_STATIC_ASSERT(sizeof(dt_byte) == 1, "bad dt_byte size")
MEGDNN_STATIC_ASSERT(sizeof(dt_qint1) == 1, "bad dt_qint1 size")
MEGDNN_STATIC_ASSERT(sizeof(dt_quint8) == 1, "bad dt_quint8 size")
MEGDNN_STATIC_ASSERT(sizeof(dt_qint16) == 2, "bad dt_qint16 size")
MEGDNN_STATIC_ASSERT(sizeof(dt_qint32) == 4, "bad dt_qint32 size")
typedef float dt_float32;
typedef int32_t dt_int32;
typedef int16_t dt_int16;
......@@ -613,7 +613,7 @@ template <uint16_t n>
struct log {
static MEGDNN_CONSTEXPR size_t value = log<(n >> 1)>::value + 1;
#if MEGDNN_CC_HOST
MEGDNN_STATIC_ASSERT((n & (n - 1)) == 0, "only full power number can have log");
MEGDNN_STATIC_ASSERT((n & (n - 1)) == 0, "only full power number can have log")
#endif
};
template <>
......
......@@ -333,7 +333,7 @@ public:
} else if (n > this->size()) {
if (this->capacity() < n)
this->grow(n);
for (auto it = this->end(), end = this->begin() + n; it != end; ++it)
for (iterator it = this->end(), end = this->begin() + n; it != end; ++it)
new (&*it) T();
this->set_end(this->begin() + n);
}
......
......@@ -144,7 +144,7 @@ size_t TensorShape::total_nr_elems() const {
}
bool TensorShape::eq_shape(const TensorShape& rhs) const {
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code");
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code")
if (ndim == rhs.ndim) {
size_t eq = 0;
switch (ndim) {
......@@ -379,7 +379,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const {
dtype == rhs.dtype,
"could not compare layout on different dtypes: %s vs %s", dtype.name(),
rhs.dtype.name());
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code");
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code")
auto ax = [](size_t shape0, size_t shape1, ptrdiff_t stride0, ptrdiff_t stride1) {
return (shape0 == shape1) & ((shape0 <= 1) | (stride0 == stride1));
......
......@@ -98,10 +98,10 @@ MEGDNN_DEVICE MEGDNN_HOST inline T round_mulh_saturate(T a, T b) {
MEGDNN_STATIC_ASSERT(
std::numeric_limits<T>::digits <= 32,
"Portable RMULH is not supported for integer "
"types larger than 32 bits.");
"types larger than 32 bits.")
MEGDNN_STATIC_ASSERT(
std::numeric_limits<T>::is_integer,
"Input types should be integer for RMULH");
"Input types should be integer for RMULH")
bool overflow = a == b && a == DTypeTrait<T>::min();
// TODO: This really should be
// rounding_shift_right_away_from_zero, but we haven't yet found a fast way
......
......@@ -185,7 +185,7 @@ NamedTensorShape::NamedTensorShape(std::initializer_list<Dimension> init_shape)
: NamedTensorShape(SmallVector<Dimension>{init_shape}) {}
bool NamedTensorShape::eq_shape(const NamedTensorShape& rhs) const {
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code");
MEGDNN_STATIC_ASSERT(MAX_NDIM == 7, "please update the code")
if (ndim == rhs.ndim) {
size_t eq = 0;
switch (ndim) {
......
......@@ -16,6 +16,7 @@
#include "src/cuda/cub/device/device_radix_sort.cuh"
#include "src/cuda/cub/device/device_segmented_radix_sort.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -175,4 +176,5 @@ INST_CUB_SORT(uint64_t)
#undef INST_FORWARD
} // namespace cuda
} // namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cuda syntax=cuda.doxygen
......@@ -10,6 +10,7 @@
*/
#include "./kern.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/reduce_helper.cuh"
#include "src/cuda/utils.cuh"
......@@ -63,4 +64,5 @@ void megdnn::cuda::checksum::calc(
size_t megdnn::cuda::checksum::get_workspace_in_bytes(size_t nr_elem) {
return get_reduce_workspace_in_bytes<ChecksumOp>(1, nr_elem, 1);
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cpp.doxygen
......@@ -25,13 +25,13 @@ namespace conv_bias {
namespace chanwise {
struct Param {
uint32_t batch, src_chl, src_h, src_w, chl_mul, flt_h, flt_w, out_h, out_w, pad_h,
pad_w, stride_h, stride_w, dilation_h, dilation_w;
int batch, src_chl, src_h, src_w, chl_mul, flt_h, flt_w, out_h, out_w, pad_h, pad_w,
stride_h, stride_w, dilation_h, dilation_w;
bool is_compute_deafult;
#if MEGDNN_CC_HOST
static Param from_fwd_args(
const BiasForwardSizeArgs& args, bool is_compute_deafult_ = true) {
#define U(v) static_cast<uint32_t>(v)
#define U(v) static_cast<int>(v)
auto&& src = args.src_layout->shape;
auto&& dst = args.dst_layout->shape;
auto&& fm = args.filter_meta;
......
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/conv_bias/matmul/inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using namespace megdnn;
......@@ -386,4 +387,5 @@ void conv_bias::exec_inplace_matmul_fwd(
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -37,6 +37,7 @@
#include "./reduce_with_scale_data.cuh"
#include "./wmma_conv_integer_u4.cuh"
#include "src/cuda/cub/util_ptx.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -50,7 +51,7 @@ template <
struct TileCounter {
MEGDNN_STATIC_ASSERT(
thread_blk_x % WARP_SIZE == 0,
"thread block size in dim x not divided by warpSize");
"thread block size in dim x not divided by warpSize")
static const size_t spatial_tile_x = thread_blk_x * pixels_per_thread_x;
static const size_t spatial_tile_y = thread_blk_y * pixels_per_thread_y;
static const size_t global_load_tile_x =
......@@ -678,4 +679,5 @@ void megdnn::cuda::do_dispatch_reduce_with_scale_data_u4(
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cuda.doxygen
......@@ -40,6 +40,7 @@
#include "src/cuda/utils.cuh"
#include "src/cuda/integer_subbyte_utils.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/reduce_helper.cuh"
using namespace megdnn;
......@@ -163,4 +164,5 @@ size_t megdnn::cuda::do_dispatch_reduce_workspace_in_bytes(
return get_reduce_workspace_in_bytes<ReduceWithScaleInt4Op<false>>(A, B, C);
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cuda.doxygen
......@@ -11,6 +11,7 @@
#include <stdio.h>
#include <iostream>
#include "./inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -429,4 +430,5 @@ void convolution3d::exec_inplace_matmul_bwd_filter(
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -11,6 +11,7 @@
#include <stdio.h>
#include <iostream>
#include "./inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -401,4 +402,5 @@ void convolution3d::exec_inplace_matmul_fwd(
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -54,9 +54,9 @@ struct RegBlockConfig {
static int constexpr reg_n = reg_n_;
static int constexpr reg_k = reg_k_;
MEGDNN_STATIC_ASSERT(
reg_m % pack_size == 0, "reg_m must be a multiple of pack_size");
reg_m % pack_size == 0, "reg_m must be a multiple of pack_size")
MEGDNN_STATIC_ASSERT(
reg_k % pack_size == 0, "reg_k must be a multiple of pack_size");
reg_k % pack_size == 0, "reg_k must be a multiple of pack_size")
static int constexpr reg_k_packed = reg_k / pack_size;
static int constexpr reg_m_packed = reg_m / pack_size;
static int constexpr reg_width = reg_width_;
......
......@@ -57,7 +57,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template <
......@@ -80,7 +80,7 @@ struct IBatchConvTrait_f1x1s1x1 {
static int constexpr block_tile_batch = RegBlockConfig::reg_n;
MEGDNN_STATIC_ASSERT(
block_tile_batch == 1,
"this algorithm does not unroll on batch dimension");
"this algorithm does not unroll on batch dimension")
static int constexpr block_tile_out_height_width =
RegBlockConfig::reg_width * ThreadConfig::nr_thread_x;
static int constexpr block_tile_in_channel = RegBlockConfig::reg_k;
......@@ -162,7 +162,7 @@ struct IBatchConvTrait {
static int constexpr block_tile_batch = RegBlockConfig::reg_n;
MEGDNN_STATIC_ASSERT(
block_tile_batch == 1,
"this algorithm does not unroll on batch dimension");
"this algorithm does not unroll on batch dimension")
static int constexpr block_tile_out_height_width =
RegBlockConfig::reg_width * ThreadConfig::nr_thread_x;
static int constexpr block_tile_in_channel = RegBlockConfig::reg_k;
......
......@@ -54,7 +54,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template <
......@@ -183,7 +183,7 @@ struct IConvIMMATraitReorderFilter {
check_bounds MEGDNN_COMMA IMMAConfig MEGDNN_COMMA
WarpTileConfig MEGDNN_COMMA ThreadConfig>::src_dtype
MEGDNN_COMMA src_dtype>::value == true,
"data type of input tensor should be int8_t");
"data type of input tensor should be int8_t")
using DataTileCount = typename IConvIMMATrait<
check_bounds, IMMAConfig, WarpTileConfig, ThreadConfig>::DataTileCount;
struct FilterTileCount {
......@@ -284,7 +284,7 @@ struct IConvIMMATraitUnrollWidth {
check_bounds MEGDNN_COMMA IMMAConfig MEGDNN_COMMA
WarpTileConfig MEGDNN_COMMA ThreadConfig>::
filter_dtype MEGDNN_COMMA filter_dtype>::value == true,
"data type of filter tensor should be int8_t");
"data type of filter tensor should be int8_t")
using FilterTileCount = typename IConvIMMATraitReorderFilter<
check_bounds, IMMAConfig, WarpTileConfig, ThreadConfig>::FilterTileCount;
using BlockTileIterator =
......@@ -345,7 +345,7 @@ struct IConvIMMATraitUnrollWidthV2 {
MEGDNN_STATIC_ASSERT(
WarpTileConfig::warp_tile_k == 1,
"kernel unrolling along width axis assumes tile k "
"in warp-level must be 1");
"in warp-level must be 1")
using copy_t = int4;
using smem_storage_dtype = smem_storage_dtype;
static int constexpr load_width = sizeof(copy_t) / sizeof(smem_storage_dtype);
......@@ -388,7 +388,7 @@ struct IConvIMMATraitUnrollWidthV2 {
MEGDNN_STATIC_ASSERT(
WarpTileConfig::warp_tile_k == 1,
"kernel unrolling along width axis assumes tile k "
"in warp-level must be 1");
"in warp-level must be 1")
using copy_t = int4;
using smem_storage_dtype = smem_storage_dtype;
static int constexpr load_width = sizeof(copy_t) / sizeof(smem_storage_dtype);
......
......@@ -54,7 +54,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template <
......@@ -175,7 +175,7 @@ struct IConvTraitUnrollWidth {
check_bounds MEGDNN_COMMA ldg_dtype MEGDNN_COMMA
RegBlockConfig MEGDNN_COMMA ThreadConfig>::
filter_dtype MEGDNN_COMMA filter_dtype>::value == true,
"data type of filter tensor should be int8_t");
"data type of filter tensor should be int8_t")
using FilterTileCount = typename IConvTrait<
check_bounds, ldg_dtype, RegBlockConfig, ThreadConfig>::FilterTileCount;
using BlockTileIterator =
......
......@@ -108,7 +108,7 @@ struct Global2ShareMemVisitor_CIxHW;
DEF(true, Layout<NCHW4>)
copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d];
MEGDNN_STATIC_ASSERT(load_width == 4, "load four element from src tensor per time");
MEGDNN_STATIC_ASSERT(load_width == 4, "load four element from src tensor per time")
__device__ Global2ShareMemVisitor_CIxHW(
smem_storage_dtype* smem_, const int* __restrict__ offset_)
......
......@@ -169,7 +169,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t reg[TileCount::reg_h][TileCount::reg_w];
MEGDNN_STATIC_ASSERT(
std::is_same<copy_t MEGDNN_COMMA int4>::value == true,
"ldg data type must be int4 for this memory visitor");
"ldg data type must be int4 for this memory visitor")
__device__ __forceinline__ void init_stride(Layout<Format::CHWN16> layout) {
stride = layout.channel_stride / TileCount::ldg_load_width;
......
......@@ -65,7 +65,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d];
MEGDNN_STATIC_ASSERT(
std::is_same<copy_t MEGDNN_COMMA int4>::value == true,
"ldg data type must be int4 for this memory visitor");
"ldg data type must be int4 for this memory visitor")
__device__ __forceinline__ void init_stride(Layout<Format::CHWN4> layout) {
stride = layout.channel_stride / TileCount::ldg_load_width;
......
......@@ -66,7 +66,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t reg[TileCount::reg_h][TileCount::reg_w][TileCount::reg_d];
MEGDNN_STATIC_ASSERT(
std::is_same<copy_t MEGDNN_COMMA int4>::value == true,
"ldg data type must be int4 for this memory visitor");
"ldg data type must be int4 for this memory visitor")
__device__ __forceinline__ void init_stride(Layout<Format::CHWN4> layout) {
stride = layout.channel_stride / TileCount::ldg_load_width;
......
......@@ -63,7 +63,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t reg[TileCount::reg_h][TileCount::reg_w];
MEGDNN_STATIC_ASSERT(
std::is_same<copy_t MEGDNN_COMMA int4>::value == true,
"ldg data type must be int4 for this memory visitor");
"ldg data type must be int4 for this memory visitor")
__device__ __forceinline__ void init_stride(Layout<Format::CHWN16> layout) {
stride = layout.width_stride / TileCount::ldg_load_width;
......
......@@ -12,6 +12,7 @@
#include "./kern.cuh"
#include "./kern_helper.cuh"
#include "./kern_impl.cuinl"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn::cuda;
using namespace cumsum::detail::cubwrap;
......@@ -88,4 +89,5 @@ void cumsum::get_BX_BY(
BY = 512 / BX;
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -125,7 +125,7 @@ struct OpCallerBinary<FuseOpWrapper<Op>, PVis0, PVis1> {
PVis0 par0;
PVis1 par1;
MEGDNN_STATIC_ASSERT(
PVis0::packed_size == PVis1::packed_size, "vector size mismatch");
PVis0::packed_size == PVis1::packed_size, "vector size mismatch")
static const uint32_t packed_size = PVis0::packed_size;
__device__ __forceinline__ void thread_init(uint32_t idx) {
......
/**
* \file dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#ifdef __GNUC__
#if CUDA_VERSION < 9020
#pragma GCC diagnostic pop
#endif
#endif
#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#undef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#else
#error "diagnostic_epilogue.h must be included after diagnostic_prologue.h"
#endif
// vim: syntax=cpp.doxygen
/**
* \file dnn/src/cuda/kernel_common/diagnostic_prologue.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#error "diagnostic_prologue.h included twice without including diagnostic_epilogue.h"
#else
#define MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#endif
//! see
//! https://stackoverflow.com/questions/49836419/how-to-hide-nvccs-function-was-declared-but-never-referenced-warnings
//! for more details.
#ifdef __GNUC__
#if CUDA_VERSION < 9020
#pragma GCC diagnostic push
#pragma diag_suppress 177 // suppress "function was declared but never referenced
// warning"
#endif
#endif
// vim: syntax=cpp.doxygen
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_bwd_data.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -580,4 +581,5 @@ void megdnn::cuda::local_share_bwd_data::_do_local_share_bwd_data_implicit_gemm(
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
......@@ -10,6 +10,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_bwd_filter.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -495,4 +496,5 @@ void megdnn::cuda::local_share_bwd_filter::_do_local_share_bwd_filter_implicit_g
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_forward.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -54,7 +55,7 @@ struct FilterTileCount {
MEGDNN_STATIC_ASSERT(
smem_w % ThreadConfig::nr_thread_x == 0,
"col of share memory must be divided by nr_thread_x");
"col of share memory must be divided by nr_thread_x")
static int const reg_h =
(smem_h + ThreadConfig::nr_thread_y - 1) / ThreadConfig::nr_thread_y;
static int const reg_w = smem_w / ThreadConfig::nr_thread_x;
......@@ -1292,4 +1293,5 @@ void megdnn::cuda::local_share::_do_local_share_convolution_large_batch_size(
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
......@@ -10,6 +10,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_forward.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -582,4 +583,5 @@ void megdnn::cuda::local_share::
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
......@@ -79,7 +79,7 @@ struct GlobalToShareMemStream {
GlobalToShareMemStreamConfig_::BlockSize ==
GlobalToShareMemStreamConfig_::CACHE_SIZE *
BlockConfig_::WARPS_PER_BLOCK,
"Block size mismatch");
"Block size mismatch")
uint8_t* smem;
const uint8_t* g_ptr;
......
......@@ -11,6 +11,7 @@
*/
#include "./pooling2d_qint.cuh"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
using namespace megdnn;
......@@ -350,7 +351,7 @@ __global__ void pooling2d_device_template_nchwc(
static int constexpr section = pack_byte / sizeof(ldg_type);
MEGDNN_STATIC_ASSERT(
ldg_width == ldg_width_assert,
"pooling2d (NCHW64) kernel must use 128bit width ldg instruction");
"pooling2d (NCHW64) kernel must use 128bit width ldg instruction")
const int c_packed = param.c / pack_size;
const int batch = tid / (param.ho * param.wo * c_packed * section);
const int batch_residual = tid - batch * param.ho * param.wo * c_packed * section;
......@@ -402,7 +403,7 @@ __global__ void pooling2d_device_template_nhwc(
static int constexpr ldg_width_bytes = sizeof(ldg_type);
MEGDNN_STATIC_ASSERT(
ldg_width == ldg_width_assert,
"pooling2d (NHWC) kernel must ldg_width == ldg_width_assert");
"pooling2d (NHWC) kernel must ldg_width == ldg_width_assert")
const int c_packed = param.c / pack_size;
const int batch = tid / (param.ho * param.wo * c_packed);
const int batch_residual = tid - batch * param.ho * param.wo * c_packed;
......@@ -691,4 +692,6 @@ void megdnn::cuda::pooling2d::do_pooling2d_int4_nhwc(
kern<<<nr_blocks, nr_threads, 0, stream>>>(d_src, d_dst, param, zero_point);
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
......@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
#include "src/cuda/utils.cuh"
......@@ -51,4 +52,5 @@ LaunchConfig cuda::detail::query_launch_config_for_kernel_uncached(
return ret;
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -10,6 +10,7 @@
* implied.
*/
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
#include "src/cuda/relayout_format/relayout_format_kern.cuh"
......@@ -104,7 +105,7 @@ __global__ void kern_nchw_nchwx(
MEGDNN_STATIC_ASSERT(
std::is_same<SrcType MEGDNN_COMMA DstType>::value,
"Currently this kernel only support accessing tensor "
"src and dst in same data type.");
"src and dst in same data type.")
n_stride_src /= size_src_type;
ic_stride /= size_src_type;
n_stride_dst /= size_dst_type;
......@@ -585,3 +586,5 @@ void relayout_format::relayout_format_cuda_nchw_nchw4_weight(
ic_stride, oc_stride_dst, group_stride_src, group_stride_dst, 0, {});
after_kernel_launch();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
......@@ -478,12 +478,12 @@ struct RelayoutProblem {
using Transpose = Transpose_;
using CudaPostProcess = CudaPostProcess_;
MEGDNN_STATIC_ASSERT(
SrcIterator::chan_blk == DstIterator::chan_blk, "channel block mismatch");
SrcIterator::chan_blk == DstIterator::chan_blk, "channel block mismatch")
MEGDNN_STATIC_ASSERT(
SrcIterator::width == DstIterator::width, "width block mismatch");
SrcIterator::width == DstIterator::width, "width block mismatch")
MEGDNN_STATIC_ASSERT(
SrcIterator::size_nbits == DstIterator::size_nbits,
"size in bits of elements mismatch");
"size in bits of elements mismatch")
static constexpr int pack_chan = SrcIterator::chan_blk;
static constexpr int pack_width = SrcIterator::width;
using DnnSrcType = typename CudaPostProcess::SrcType;
......
......@@ -80,7 +80,7 @@ inline __device__ DstType make_zero_pad(const uint8_t zero_point) {
template <>
inline __device__ char4 make_zero_pad<char4>(const uint8_t zero_point) {
char izp = reinterpret_cast<const char&>(zero_point);
signed char izp = reinterpret_cast<const signed char&>(zero_point);
return {izp, izp, izp, izp};
}
......
......@@ -16,6 +16,7 @@
#include "src/common/resize.cuh"
#include "src/cuda/cv/kernel_common.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -319,4 +320,5 @@ INST(int8_t);
} // namespace cuda
} // namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -13,6 +13,7 @@
#include "megdnn/dtype.h"
#include "src/cuda/elemwise_helper.cuh"
#include "src/cuda/elemwise_helper_q4.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using namespace megdnn;
using namespace cuda;
......@@ -410,4 +411,5 @@ MEGDNN_FOREACH_COMPUTING_CTYPE(INST_SRC_NORMAL_LOWBIT)
} // namespace cuda
} // namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -12,6 +12,7 @@
#include <cstdio>
#include "src/common/rounding_converter.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
#include "src/cuda/warp_affine/common.cuh"
......@@ -288,4 +289,5 @@ INST(int8_t)
} // namespace cuda
} // namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -16,6 +16,7 @@
#include "src/common/rounding_converter.cuh"
#include "src/cuda/error_info.cuh"
#include "src/cuda/integer_subbyte_utils.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
#include "src/cuda/warp_perspective/common.cuh"
......@@ -1760,4 +1761,5 @@ INST(dt_quint8, uint8_t, float)
} // namespace cuda
} // namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
......@@ -51,7 +51,7 @@ void lowbit_recursive_cp(
MEGDNN_STATIC_ASSERT(
!(8_z % size_nbits),
"size in bits of lowbit data type can only be 1, 2, 4 "
"or 8");
"or 8")
if (idx < (src.layout.ndim - 1)) {
for (size_t i = 0; i < src.layout[idx]; ++i) {
lowbit_recursive_cp<size_nbits>(
......
......@@ -130,10 +130,10 @@ T do_round_mulh_saturate(T a, T b) {
MEGDNN_STATIC_ASSERT(
std::numeric_limits<T>::digits <= 32,
"Portable RMULH is not supported for integer "
"types larger than 32 bits.");
"types larger than 32 bits.")
MEGDNN_STATIC_ASSERT(
std::numeric_limits<T>::is_integer,
"Input types should be integer for RMULH");
"Input types should be integer for RMULH")
bool overflow = a == b && a == DTypeTrait<T>::min();
// TODO: This really should be
// rounding_shift_right_away_from_zero, but we haven't yet found a fast
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册