Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
14e9ad62
MegEngine
项目概览
MegEngine 天元
/
MegEngine
接近 2 年 前同步成功
通知
414
Star
4708
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
14e9ad62
编写于
4月 06, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(megdnn): emit define-but-not-referenced and extra-;-ignored warning on cuda9.0~cuda9.1
GitOrigin-RevId: f6db42e3958499c214bf3fc6f15ed7e8d8321da3
上级
4c0bff1d
变更
43
隐藏空白更改
内联
并排
Showing
43 changed file
with
152 addition
and
59 deletion
+152
-59
dnn/include/megdnn/arch.h
dnn/include/megdnn/arch.h
+13
-13
dnn/include/megdnn/dtype.h
dnn/include/megdnn/dtype.h
+6
-6
dnn/include/megdnn/thin/small_vector.h
dnn/include/megdnn/thin/small_vector.h
+1
-1
dnn/src/common/basic_types.cpp
dnn/src/common/basic_types.cpp
+2
-2
dnn/src/common/elemwise_helper.cuh
dnn/src/common/elemwise_helper.cuh
+2
-2
dnn/src/common/named_tensor.cpp
dnn/src/common/named_tensor.cpp
+1
-1
dnn/src/cuda/argsort/argsort.cu
dnn/src/cuda/argsort/argsort.cu
+2
-0
dnn/src/cuda/checksum/kern.cu
dnn/src/cuda/checksum/kern.cu
+2
-0
dnn/src/cuda/conv_bias/chanwise/kern.cuh
dnn/src/cuda/conv_bias/chanwise/kern.cuh
+3
-3
dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu
dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu
+2
-0
dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu
...cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu
+3
-1
dnn/src/cuda/conv_bias/reduce_filter.cu
dnn/src/cuda/conv_bias/reduce_filter.cu
+2
-0
dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu
...cuda/convolution3d/backward_filter/inplace_matmul_impl.cu
+2
-0
dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu
dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu
+2
-0
dnn/src/cuda/convolution_helper/config.cuh
dnn/src/cuda/convolution_helper/config.cuh
+2
-2
dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh
.../cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh
+3
-3
dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh
...c/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh
+5
-5
dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh
dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh
+2
-2
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh
...per/global_memory_visitor/global_memory_visitor_cixhw.cuh
+1
-1
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh
...global_memory_visitor/global_memory_visitor_imma_cixn.cuh
+1
-1
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh
...bal_memory_visitor/global_memory_visitor_imma_cixwixn.cuh
+1
-1
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh
...bal_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh
+1
-1
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh
...lobal_memory_visitor/global_memory_visitor_imma_fwxco.cuh
+1
-1
dnn/src/cuda/cumsum/kern_impl.cu
dnn/src/cuda/cumsum/kern_impl.cu
+2
-0
dnn/src/cuda/elemwise/special_kerns.inl
dnn/src/cuda/elemwise/special_kerns.inl
+1
-1
dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh
dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh
+24
-0
dnn/src/cuda/kernel_common/diagnostic_prologue.cuh
dnn/src/cuda/kernel_common/diagnostic_prologue.cuh
+29
-0
dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu
...e/backward_data/local_share_bwd_data_f32_implicit_gemm.cu
+2
-0
dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu
...ckward_filter/local_share_bwd_filter_f32_implicit_gemm.cu
+2
-0
dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu
...hare/forward/local_share_fwd_chwn_f32_batch_size_aware.cu
+3
-1
dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu
.../local_share_fwd_chwn_f32_batch_size_aware_small_image.cu
+2
-0
dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
...src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
+1
-1
dnn/src/cuda/pooling/pooling2d_qint.cu
dnn/src/cuda/pooling/pooling2d_qint.cu
+5
-2
dnn/src/cuda/query_blocksize_impl.cu
dnn/src/cuda/query_blocksize_impl.cu
+2
-0
dnn/src/cuda/relayout_format/relayout_format.cu
dnn/src/cuda/relayout_format/relayout_format.cu
+4
-1
dnn/src/cuda/relayout_format/relayout_format_kern.cuh
dnn/src/cuda/relayout_format/relayout_format_kern.cuh
+3
-3
dnn/src/cuda/relayout_format/relayout_format_utils.cuh
dnn/src/cuda/relayout_format/relayout_format_utils.cuh
+1
-1
dnn/src/cuda/resize/forward.cu
dnn/src/cuda/resize/forward.cu
+2
-0
dnn/src/cuda/type_cvt/kern.cu
dnn/src/cuda/type_cvt/kern.cu
+2
-0
dnn/src/cuda/warp_affine/warp_affine.cu
dnn/src/cuda/warp_affine/warp_affine.cu
+2
-0
dnn/src/cuda/warp_perspective/forward.cu
dnn/src/cuda/warp_perspective/forward.cu
+2
-0
dnn/src/naive/relayout_format/opr_impl.cpp
dnn/src/naive/relayout_format/opr_impl.cpp
+1
-1
src/opr/test/basic_arith/elemwise.cpp
src/opr/test/basic_arith/elemwise.cpp
+2
-2
未找到文件。
dnn/include/megdnn/arch.h
浏览文件 @
14e9ad62
...
...
@@ -36,15 +36,15 @@
#define MEGDNN_ALWAYS_INLINE inline __attribute__((__always_inline__))
#endif
#define MEGDNN_DEPRECATED __attribute__((deprecated))
#define MEGDNN_PACKED __attribute__((packed))
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT
static_assert
#define MEGDNN_FINAL final
#define MEGDNN_NORETURN __attribute__((noreturn))
#define MEGDNN_WARN_UNUSED_RESULT __attribute__((warn_unused_result))
#define MEGDNN_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#define MEGDNN_DEPRECATED
__attribute__((deprecated))
#define MEGDNN_PACKED
__attribute__((packed))
#define MEGDNN_CONSTEXPR
constexpr
#define MEGDNN_NOEXCEPT
noexcept
#define MEGDNN_STATIC_ASSERT
(cond, msg) static_assert(cond, msg);
#define MEGDNN_FINAL
final
#define MEGDNN_NORETURN
__attribute__((noreturn))
#define MEGDNN_WARN_UNUSED_RESULT
__attribute__((warn_unused_result))
#define MEGDNN_ATTRIBUTE_TARGET(simd)
__attribute__((target(simd)))
#if defined(__clang_major__) && (__clang_major__ >= 7)
#define MEGDNN_LAMBDA_ATTRIBUTE_TARGET(simd) __attribute__((target(simd)))
#else
...
...
@@ -64,10 +64,10 @@
#define MEGDNN_DEPRECATED
#define MEGDNN_PACKED
#define MEGDNN_CONSTEXPR constexpr
#define MEGDNN_NOEXCEPT noexcept
#define MEGDNN_STATIC_ASSERT
static_assert
#define MEGDNN_FINAL final
#define MEGDNN_CONSTEXPR
constexpr
#define MEGDNN_NOEXCEPT
noexcept
#define MEGDNN_STATIC_ASSERT
(cond, msg) static_assert(cond, msg);
#define MEGDNN_FINAL
final
#if defined(_MSC_VER)
#define MEGDNN_NORETURN __declspec(noreturn)
...
...
dnn/include/megdnn/dtype.h
浏览文件 @
14e9ad62
...
...
@@ -311,11 +311,11 @@ public:
#ifdef __clang__
#pragma clang diagnostic pop
#endif
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_byte
)
==
1
,
"bad dt_byte size"
)
;
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint1
)
==
1
,
"bad dt_qint1 size"
)
;
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_quint8
)
==
1
,
"bad dt_quint8 size"
)
;
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint16
)
==
2
,
"bad dt_qint16 size"
)
;
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint32
)
==
4
,
"bad dt_qint32 size"
)
;
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_byte
)
==
1
,
"bad dt_byte size"
)
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint1
)
==
1
,
"bad dt_qint1 size"
)
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_quint8
)
==
1
,
"bad dt_quint8 size"
)
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint16
)
==
2
,
"bad dt_qint16 size"
)
MEGDNN_STATIC_ASSERT
(
sizeof
(
dt_qint32
)
==
4
,
"bad dt_qint32 size"
)
typedef
float
dt_float32
;
typedef
int32_t
dt_int32
;
typedef
int16_t
dt_int16
;
...
...
@@ -613,7 +613,7 @@ template <uint16_t n>
struct
log
{
static
MEGDNN_CONSTEXPR
size_t
value
=
log
<
(
n
>>
1
)
>::
value
+
1
;
#if MEGDNN_CC_HOST
MEGDNN_STATIC_ASSERT
((
n
&
(
n
-
1
))
==
0
,
"only full power number can have log"
)
;
MEGDNN_STATIC_ASSERT
((
n
&
(
n
-
1
))
==
0
,
"only full power number can have log"
)
#endif
};
template
<
>
...
...
dnn/include/megdnn/thin/small_vector.h
浏览文件 @
14e9ad62
...
...
@@ -333,7 +333,7 @@ public:
}
else
if
(
n
>
this
->
size
())
{
if
(
this
->
capacity
()
<
n
)
this
->
grow
(
n
);
for
(
auto
it
=
this
->
end
(),
end
=
this
->
begin
()
+
n
;
it
!=
end
;
++
it
)
for
(
iterator
it
=
this
->
end
(),
end
=
this
->
begin
()
+
n
;
it
!=
end
;
++
it
)
new
(
&*
it
)
T
();
this
->
set_end
(
this
->
begin
()
+
n
);
}
...
...
dnn/src/common/basic_types.cpp
浏览文件 @
14e9ad62
...
...
@@ -144,7 +144,7 @@ size_t TensorShape::total_nr_elems() const {
}
bool
TensorShape
::
eq_shape
(
const
TensorShape
&
rhs
)
const
{
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
;
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
if
(
ndim
==
rhs
.
ndim
)
{
size_t
eq
=
0
;
switch
(
ndim
)
{
...
...
@@ -379,7 +379,7 @@ bool TensorLayout::eq_layout(const TensorLayout& rhs) const {
dtype
==
rhs
.
dtype
,
"could not compare layout on different dtypes: %s vs %s"
,
dtype
.
name
(),
rhs
.
dtype
.
name
());
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
;
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
auto
ax
=
[](
size_t
shape0
,
size_t
shape1
,
ptrdiff_t
stride0
,
ptrdiff_t
stride1
)
{
return
(
shape0
==
shape1
)
&
((
shape0
<=
1
)
|
(
stride0
==
stride1
));
...
...
dnn/src/common/elemwise_helper.cuh
浏览文件 @
14e9ad62
...
...
@@ -98,10 +98,10 @@ MEGDNN_DEVICE MEGDNN_HOST inline T round_mulh_saturate(T a, T b) {
MEGDNN_STATIC_ASSERT
(
std
::
numeric_limits
<
T
>::
digits
<=
32
,
"Portable RMULH is not supported for integer "
"types larger than 32 bits."
)
;
"types larger than 32 bits."
)
MEGDNN_STATIC_ASSERT
(
std
::
numeric_limits
<
T
>::
is_integer
,
"Input types should be integer for RMULH"
)
;
"Input types should be integer for RMULH"
)
bool
overflow
=
a
==
b
&&
a
==
DTypeTrait
<
T
>::
min
();
// TODO: This really should be
// rounding_shift_right_away_from_zero, but we haven't yet found a fast way
...
...
dnn/src/common/named_tensor.cpp
浏览文件 @
14e9ad62
...
...
@@ -185,7 +185,7 @@ NamedTensorShape::NamedTensorShape(std::initializer_list<Dimension> init_shape)
:
NamedTensorShape
(
SmallVector
<
Dimension
>
{
init_shape
})
{}
bool
NamedTensorShape
::
eq_shape
(
const
NamedTensorShape
&
rhs
)
const
{
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
;
MEGDNN_STATIC_ASSERT
(
MAX_NDIM
==
7
,
"please update the code"
)
if
(
ndim
==
rhs
.
ndim
)
{
size_t
eq
=
0
;
switch
(
ndim
)
{
...
...
dnn/src/cuda/argsort/argsort.cu
浏览文件 @
14e9ad62
...
...
@@ -16,6 +16,7 @@
#include "src/cuda/cub/device/device_radix_sort.cuh"
#include "src/cuda/cub/device/device_segmented_radix_sort.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -175,4 +176,5 @@ INST_CUB_SORT(uint64_t)
#undef INST_FORWARD
}
// namespace cuda
}
// namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cuda syntax=cuda.doxygen
dnn/src/cuda/checksum/kern.cu
浏览文件 @
14e9ad62
...
...
@@ -10,6 +10,7 @@
*/
#include "./kern.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/reduce_helper.cuh"
#include "src/cuda/utils.cuh"
...
...
@@ -63,4 +64,5 @@ void megdnn::cuda::checksum::calc(
size_t
megdnn
::
cuda
::
checksum
::
get_workspace_in_bytes
(
size_t
nr_elem
)
{
return
get_reduce_workspace_in_bytes
<
ChecksumOp
>
(
1
,
nr_elem
,
1
);
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cpp.doxygen
dnn/src/cuda/conv_bias/chanwise/kern.cuh
浏览文件 @
14e9ad62
...
...
@@ -25,13 +25,13 @@ namespace conv_bias {
namespace
chanwise
{
struct
Param
{
uint32_t
batch
,
src_chl
,
src_h
,
src_w
,
chl_mul
,
flt_h
,
flt_w
,
out_h
,
out_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
;
int
batch
,
src_chl
,
src_h
,
src_w
,
chl_mul
,
flt_h
,
flt_w
,
out_h
,
out_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
;
bool
is_compute_deafult
;
#if MEGDNN_CC_HOST
static
Param
from_fwd_args
(
const
BiasForwardSizeArgs
&
args
,
bool
is_compute_deafult_
=
true
)
{
#define U(v) static_cast<
uint32_
t>(v)
#define U(v) static_cast<
in
t>(v)
auto
&&
src
=
args
.
src_layout
->
shape
;
auto
&&
dst
=
args
.
dst_layout
->
shape
;
auto
&&
fm
=
args
.
filter_meta
;
...
...
dnn/src/cuda/conv_bias/matmul/inplace_matmul_impl.cu
浏览文件 @
14e9ad62
...
...
@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/conv_bias/matmul/inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using
namespace
megdnn
;
...
...
@@ -386,4 +387,5 @@ void conv_bias::exec_inplace_matmul_fwd(
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/conv_bias/quint4x4x32_wmma/reduce_with_scale_data.cu
浏览文件 @
14e9ad62
...
...
@@ -37,6 +37,7 @@
#include "./reduce_with_scale_data.cuh"
#include "./wmma_conv_integer_u4.cuh"
#include "src/cuda/cub/util_ptx.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -50,7 +51,7 @@ template <
struct
TileCounter
{
MEGDNN_STATIC_ASSERT
(
thread_blk_x
%
WARP_SIZE
==
0
,
"thread block size in dim x not divided by warpSize"
)
;
"thread block size in dim x not divided by warpSize"
)
static
const
size_t
spatial_tile_x
=
thread_blk_x
*
pixels_per_thread_x
;
static
const
size_t
spatial_tile_y
=
thread_blk_y
*
pixels_per_thread_y
;
static
const
size_t
global_load_tile_x
=
...
...
@@ -678,4 +679,5 @@ void megdnn::cuda::do_dispatch_reduce_with_scale_data_u4(
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cuda.doxygen
dnn/src/cuda/conv_bias/reduce_filter.cu
浏览文件 @
14e9ad62
...
...
@@ -40,6 +40,7 @@
#include "src/cuda/utils.cuh"
#include "src/cuda/integer_subbyte_utils.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/reduce_helper.cuh"
using
namespace
megdnn
;
...
...
@@ -163,4 +164,5 @@ size_t megdnn::cuda::do_dispatch_reduce_workspace_in_bytes(
return
get_reduce_workspace_in_bytes
<
ReduceWithScaleInt4Op
<
false
>>
(
A
,
B
,
C
);
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cuda.doxygen
dnn/src/cuda/convolution3d/backward_filter/inplace_matmul_impl.cu
浏览文件 @
14e9ad62
...
...
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <iostream>
#include "./inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -429,4 +430,5 @@ void convolution3d::exec_inplace_matmul_bwd_filter(
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/convolution3d/forward/inplace_matmul_impl.cu
浏览文件 @
14e9ad62
...
...
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <iostream>
#include "./inplace_matmul_impl.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -401,4 +402,5 @@ void convolution3d::exec_inplace_matmul_fwd(
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/convolution_helper/config.cuh
浏览文件 @
14e9ad62
...
...
@@ -54,9 +54,9 @@ struct RegBlockConfig {
static
int
constexpr
reg_n
=
reg_n_
;
static
int
constexpr
reg_k
=
reg_k_
;
MEGDNN_STATIC_ASSERT
(
reg_m
%
pack_size
==
0
,
"reg_m must be a multiple of pack_size"
)
;
reg_m
%
pack_size
==
0
,
"reg_m must be a multiple of pack_size"
)
MEGDNN_STATIC_ASSERT
(
reg_k
%
pack_size
==
0
,
"reg_k must be a multiple of pack_size"
)
;
reg_k
%
pack_size
==
0
,
"reg_k must be a multiple of pack_size"
)
static
int
constexpr
reg_k_packed
=
reg_k
/
pack_size
;
static
int
constexpr
reg_m_packed
=
reg_m
/
pack_size
;
static
int
constexpr
reg_width
=
reg_width_
;
...
...
dnn/src/cuda/convolution_helper/conv_trait/ibatch_conv_trait.cuh
浏览文件 @
14e9ad62
...
...
@@ -57,7 +57,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_
;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template
<
...
...
@@ -80,7 +80,7 @@ struct IBatchConvTrait_f1x1s1x1 {
static
int
constexpr
block_tile_batch
=
RegBlockConfig
::
reg_n
;
MEGDNN_STATIC_ASSERT
(
block_tile_batch
==
1
,
"this algorithm does not unroll on batch dimension"
)
;
"this algorithm does not unroll on batch dimension"
)
static
int
constexpr
block_tile_out_height_width
=
RegBlockConfig
::
reg_width
*
ThreadConfig
::
nr_thread_x
;
static
int
constexpr
block_tile_in_channel
=
RegBlockConfig
::
reg_k
;
...
...
@@ -162,7 +162,7 @@ struct IBatchConvTrait {
static
int
constexpr
block_tile_batch
=
RegBlockConfig
::
reg_n
;
MEGDNN_STATIC_ASSERT
(
block_tile_batch
==
1
,
"this algorithm does not unroll on batch dimension"
)
;
"this algorithm does not unroll on batch dimension"
)
static
int
constexpr
block_tile_out_height_width
=
RegBlockConfig
::
reg_width
*
ThreadConfig
::
nr_thread_x
;
static
int
constexpr
block_tile_in_channel
=
RegBlockConfig
::
reg_k
;
...
...
dnn/src/cuda/convolution_helper/conv_trait/iconv_imma_trait.cuh
浏览文件 @
14e9ad62
...
...
@@ -54,7 +54,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_
;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template
<
...
...
@@ -183,7 +183,7 @@ struct IConvIMMATraitReorderFilter {
check_bounds
MEGDNN_COMMA
IMMAConfig
MEGDNN_COMMA
WarpTileConfig
MEGDNN_COMMA
ThreadConfig
>::
src_dtype
MEGDNN_COMMA
src_dtype
>::
value
==
true
,
"data type of input tensor should be int8_t"
)
;
"data type of input tensor should be int8_t"
)
using
DataTileCount
=
typename
IConvIMMATrait
<
check_bounds
,
IMMAConfig
,
WarpTileConfig
,
ThreadConfig
>::
DataTileCount
;
struct
FilterTileCount
{
...
...
@@ -284,7 +284,7 @@ struct IConvIMMATraitUnrollWidth {
check_bounds
MEGDNN_COMMA
IMMAConfig
MEGDNN_COMMA
WarpTileConfig
MEGDNN_COMMA
ThreadConfig
>::
filter_dtype
MEGDNN_COMMA
filter_dtype
>::
value
==
true
,
"data type of filter tensor should be int8_t"
)
;
"data type of filter tensor should be int8_t"
)
using
FilterTileCount
=
typename
IConvIMMATraitReorderFilter
<
check_bounds
,
IMMAConfig
,
WarpTileConfig
,
ThreadConfig
>::
FilterTileCount
;
using
BlockTileIterator
=
...
...
@@ -345,7 +345,7 @@ struct IConvIMMATraitUnrollWidthV2 {
MEGDNN_STATIC_ASSERT
(
WarpTileConfig
::
warp_tile_k
==
1
,
"kernel unrolling along width axis assumes tile k "
"in warp-level must be 1"
)
;
"in warp-level must be 1"
)
using
copy_t
=
int4
;
using
smem_storage_dtype
=
smem_storage_dtype
;
static
int
constexpr
load_width
=
sizeof
(
copy_t
)
/
sizeof
(
smem_storage_dtype
);
...
...
@@ -388,7 +388,7 @@ struct IConvIMMATraitUnrollWidthV2 {
MEGDNN_STATIC_ASSERT
(
WarpTileConfig
::
warp_tile_k
==
1
,
"kernel unrolling along width axis assumes tile k "
"in warp-level must be 1"
)
;
"in warp-level must be 1"
)
using
copy_t
=
int4
;
using
smem_storage_dtype
=
smem_storage_dtype
;
static
int
constexpr
load_width
=
sizeof
(
copy_t
)
/
sizeof
(
smem_storage_dtype
);
...
...
dnn/src/cuda/convolution_helper/conv_trait/iconv_trait.cuh
浏览文件 @
14e9ad62
...
...
@@ -54,7 +54,7 @@ namespace convolution {
using KernLayout = _kern_layout; \
using OutputLayout = _output_layout; \
using Param = _conv_param; \
static constexpr bool check_bounds = check_bounds_
;
static constexpr bool check_bounds = check_bounds_
#define MEGDNN_COMMA ,
template
<
...
...
@@ -175,7 +175,7 @@ struct IConvTraitUnrollWidth {
check_bounds
MEGDNN_COMMA
ldg_dtype
MEGDNN_COMMA
RegBlockConfig
MEGDNN_COMMA
ThreadConfig
>::
filter_dtype
MEGDNN_COMMA
filter_dtype
>::
value
==
true
,
"data type of filter tensor should be int8_t"
)
;
"data type of filter tensor should be int8_t"
)
using
FilterTileCount
=
typename
IConvTrait
<
check_bounds
,
ldg_dtype
,
RegBlockConfig
,
ThreadConfig
>::
FilterTileCount
;
using
BlockTileIterator
=
...
...
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_cixhw.cuh
浏览文件 @
14e9ad62
...
...
@@ -108,7 +108,7 @@ struct Global2ShareMemVisitor_CIxHW;
DEF
(
true
,
Layout
<
NCHW4
>
)
copy_t
reg
[
TileCount
::
reg_h
][
TileCount
::
reg_w
][
TileCount
::
reg_d
];
MEGDNN_STATIC_ASSERT
(
load_width
==
4
,
"load four element from src tensor per time"
)
;
MEGDNN_STATIC_ASSERT
(
load_width
==
4
,
"load four element from src tensor per time"
)
__device__
Global2ShareMemVisitor_CIxHW
(
smem_storage_dtype
*
smem_
,
const
int
*
__restrict__
offset_
)
...
...
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixn.cuh
浏览文件 @
14e9ad62
...
...
@@ -169,7 +169,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t
reg
[
TileCount
::
reg_h
][
TileCount
::
reg_w
];
MEGDNN_STATIC_ASSERT
(
std
::
is_same
<
copy_t
MEGDNN_COMMA
int4
>::
value
==
true
,
"ldg data type must be int4 for this memory visitor"
)
;
"ldg data type must be int4 for this memory visitor"
)
__device__
__forceinline__
void
init_stride
(
Layout
<
Format
::
CHWN16
>
layout
)
{
stride
=
layout
.
channel_stride
/
TileCount
::
ldg_load_width
;
...
...
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwixn.cuh
浏览文件 @
14e9ad62
...
...
@@ -65,7 +65,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t
reg
[
TileCount
::
reg_h
][
TileCount
::
reg_w
][
TileCount
::
reg_d
];
MEGDNN_STATIC_ASSERT
(
std
::
is_same
<
copy_t
MEGDNN_COMMA
int4
>::
value
==
true
,
"ldg data type must be int4 for this memory visitor"
)
;
"ldg data type must be int4 for this memory visitor"
)
__device__
__forceinline__
void
init_stride
(
Layout
<
Format
::
CHWN4
>
layout
)
{
stride
=
layout
.
channel_stride
/
TileCount
::
ldg_load_width
;
...
...
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_cixwoxn.cuh
浏览文件 @
14e9ad62
...
...
@@ -66,7 +66,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t
reg
[
TileCount
::
reg_h
][
TileCount
::
reg_w
][
TileCount
::
reg_d
];
MEGDNN_STATIC_ASSERT
(
std
::
is_same
<
copy_t
MEGDNN_COMMA
int4
>::
value
==
true
,
"ldg data type must be int4 for this memory visitor"
)
;
"ldg data type must be int4 for this memory visitor"
)
__device__
__forceinline__
void
init_stride
(
Layout
<
Format
::
CHWN4
>
layout
)
{
stride
=
layout
.
channel_stride
/
TileCount
::
ldg_load_width
;
...
...
dnn/src/cuda/convolution_helper/global_memory_visitor/global_memory_visitor_imma_fwxco.cuh
浏览文件 @
14e9ad62
...
...
@@ -63,7 +63,7 @@ const int gl_load_x = tid - gl_load_y * TileCount::load_x;
copy_t
reg
[
TileCount
::
reg_h
][
TileCount
::
reg_w
];
MEGDNN_STATIC_ASSERT
(
std
::
is_same
<
copy_t
MEGDNN_COMMA
int4
>::
value
==
true
,
"ldg data type must be int4 for this memory visitor"
)
;
"ldg data type must be int4 for this memory visitor"
)
__device__
__forceinline__
void
init_stride
(
Layout
<
Format
::
CHWN16
>
layout
)
{
stride
=
layout
.
width_stride
/
TileCount
::
ldg_load_width
;
...
...
dnn/src/cuda/cumsum/kern_impl.cu
浏览文件 @
14e9ad62
...
...
@@ -12,6 +12,7 @@
#include "./kern.cuh"
#include "./kern_helper.cuh"
#include "./kern_impl.cuinl"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
::
cuda
;
using
namespace
cumsum
::
detail
::
cubwrap
;
...
...
@@ -88,4 +89,5 @@ void cumsum::get_BX_BY(
BY
=
512
/
BX
;
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/elemwise/special_kerns.inl
浏览文件 @
14e9ad62
...
...
@@ -125,7 +125,7 @@ struct OpCallerBinary<FuseOpWrapper<Op>, PVis0, PVis1> {
PVis0
par0
;
PVis1
par1
;
MEGDNN_STATIC_ASSERT
(
PVis0
::
packed_size
==
PVis1
::
packed_size
,
"vector size mismatch"
)
;
PVis0
::
packed_size
==
PVis1
::
packed_size
,
"vector size mismatch"
)
static
const
uint32_t
packed_size
=
PVis0
::
packed_size
;
__device__
__forceinline__
void
thread_init
(
uint32_t
idx
)
{
...
...
dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh
0 → 100644
浏览文件 @
14e9ad62
/**
* \file dnn/src/cuda/kernel_common/diagnostic_epilogue.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#ifdef __GNUC__
#if CUDA_VERSION < 9020
#pragma GCC diagnostic pop
#endif
#endif
#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#undef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#else
#error "diagnostic_epilogue.h must be included after diagnostic_prologue.h"
#endif
// vim: syntax=cpp.doxygen
dnn/src/cuda/kernel_common/diagnostic_prologue.cuh
0 → 100644
浏览文件 @
14e9ad62
/**
* \file dnn/src/cuda/kernel_common/diagnostic_prologue.cuh
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#ifdef MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#error "diagnostic_prologue.h included twice without including diagnostic_epilogue.h"
#else
#define MEGDNN_DIAGNOSTIC_PROLOGUE_INCLUDED
#endif
//! see
//! https://stackoverflow.com/questions/49836419/how-to-hide-nvccs-function-was-declared-but-never-referenced-warnings
//! for more details.
#ifdef __GNUC__
#if CUDA_VERSION < 9020
#pragma GCC diagnostic push
#pragma diag_suppress 177 // suppress "function was declared but never referenced
// warning"
#endif
#endif
// vim: syntax=cpp.doxygen
dnn/src/cuda/local_share/backward_data/local_share_bwd_data_f32_implicit_gemm.cu
浏览文件 @
14e9ad62
...
...
@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_bwd_data.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -580,4 +581,5 @@ void megdnn::cuda::local_share_bwd_data::_do_local_share_bwd_data_implicit_gemm(
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
dnn/src/cuda/local_share/backward_filter/local_share_bwd_filter_f32_implicit_gemm.cu
浏览文件 @
14e9ad62
...
...
@@ -10,6 +10,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_bwd_filter.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -495,4 +496,5 @@ void megdnn::cuda::local_share_bwd_filter::_do_local_share_bwd_filter_implicit_g
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware.cu
浏览文件 @
14e9ad62
...
...
@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_forward.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -54,7 +55,7 @@ struct FilterTileCount {
MEGDNN_STATIC_ASSERT
(
smem_w
%
ThreadConfig
::
nr_thread_x
==
0
,
"col of share memory must be divided by nr_thread_x"
)
;
"col of share memory must be divided by nr_thread_x"
)
static
int
const
reg_h
=
(
smem_h
+
ThreadConfig
::
nr_thread_y
-
1
)
/
ThreadConfig
::
nr_thread_y
;
static
int
const
reg_w
=
smem_w
/
ThreadConfig
::
nr_thread_x
;
...
...
@@ -1292,4 +1293,5 @@ void megdnn::cuda::local_share::_do_local_share_convolution_large_batch_size(
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
dnn/src/cuda/local_share/forward/local_share_fwd_chwn_f32_batch_size_aware_small_image.cu
浏览文件 @
14e9ad62
...
...
@@ -10,6 +10,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "./local_share_forward.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -582,4 +583,5 @@ void megdnn::cuda::local_share::
}
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
dnn/src/cuda/matrix_mul/uint4x4x32_wmma/wmma_matrix_mul_u4.cu
浏览文件 @
14e9ad62
...
...
@@ -79,7 +79,7 @@ struct GlobalToShareMemStream {
GlobalToShareMemStreamConfig_
::
BlockSize
==
GlobalToShareMemStreamConfig_
::
CACHE_SIZE
*
BlockConfig_
::
WARPS_PER_BLOCK
,
"Block size mismatch"
)
;
"Block size mismatch"
)
uint8_t
*
smem
;
const
uint8_t
*
g_ptr
;
...
...
dnn/src/cuda/pooling/pooling2d_qint.cu
浏览文件 @
14e9ad62
...
...
@@ -11,6 +11,7 @@
*/
#include "./pooling2d_qint.cuh"
#include "src/common/opr_param_defs_enumv.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
using
namespace
megdnn
;
...
...
@@ -350,7 +351,7 @@ __global__ void pooling2d_device_template_nchwc(
static
int
constexpr
section
=
pack_byte
/
sizeof
(
ldg_type
);
MEGDNN_STATIC_ASSERT
(
ldg_width
==
ldg_width_assert
,
"pooling2d (NCHW64) kernel must use 128bit width ldg instruction"
)
;
"pooling2d (NCHW64) kernel must use 128bit width ldg instruction"
)
const
int
c_packed
=
param
.
c
/
pack_size
;
const
int
batch
=
tid
/
(
param
.
ho
*
param
.
wo
*
c_packed
*
section
);
const
int
batch_residual
=
tid
-
batch
*
param
.
ho
*
param
.
wo
*
c_packed
*
section
;
...
...
@@ -402,7 +403,7 @@ __global__ void pooling2d_device_template_nhwc(
static
int
constexpr
ldg_width_bytes
=
sizeof
(
ldg_type
);
MEGDNN_STATIC_ASSERT
(
ldg_width
==
ldg_width_assert
,
"pooling2d (NHWC) kernel must ldg_width == ldg_width_assert"
)
;
"pooling2d (NHWC) kernel must ldg_width == ldg_width_assert"
)
const
int
c_packed
=
param
.
c
/
pack_size
;
const
int
batch
=
tid
/
(
param
.
ho
*
param
.
wo
*
c_packed
);
const
int
batch_residual
=
tid
-
batch
*
param
.
ho
*
param
.
wo
*
c_packed
;
...
...
@@ -691,4 +692,6 @@ void megdnn::cuda::pooling2d::do_pooling2d_int4_nhwc(
kern
<<<
nr_blocks
,
nr_threads
,
0
,
stream
>>>
(
d_src
,
d_dst
,
param
,
zero_point
);
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cuda.doxygen
dnn/src/cuda/query_blocksize_impl.cu
浏览文件 @
14e9ad62
...
...
@@ -9,6 +9,7 @@
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
#include "src/cuda/utils.cuh"
...
...
@@ -51,4 +52,5 @@ LaunchConfig cuda::detail::query_launch_config_for_kernel_uncached(
return
ret
;
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: ft=cpp syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
dnn/src/cuda/relayout_format/relayout_format.cu
浏览文件 @
14e9ad62
...
...
@@ -10,6 +10,7 @@
* implied.
*/
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/query_blocksize.cuh"
#include "src/cuda/relayout_format/relayout_format_kern.cuh"
...
...
@@ -104,7 +105,7 @@ __global__ void kern_nchw_nchwx(
MEGDNN_STATIC_ASSERT
(
std
::
is_same
<
SrcType
MEGDNN_COMMA
DstType
>::
value
,
"Currently this kernel only support accessing tensor "
"src and dst in same data type."
)
;
"src and dst in same data type."
)
n_stride_src
/=
size_src_type
;
ic_stride
/=
size_src_type
;
n_stride_dst
/=
size_dst_type
;
...
...
@@ -585,3 +586,5 @@ void relayout_format::relayout_format_cuda_nchw_nchw4_weight(
ic_stride
,
oc_stride_dst
,
group_stride_src
,
group_stride_dst
,
0
,
{});
after_kernel_launch
();
}
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
dnn/src/cuda/relayout_format/relayout_format_kern.cuh
浏览文件 @
14e9ad62
...
...
@@ -478,12 +478,12 @@ struct RelayoutProblem {
using
Transpose
=
Transpose_
;
using
CudaPostProcess
=
CudaPostProcess_
;
MEGDNN_STATIC_ASSERT
(
SrcIterator
::
chan_blk
==
DstIterator
::
chan_blk
,
"channel block mismatch"
)
;
SrcIterator
::
chan_blk
==
DstIterator
::
chan_blk
,
"channel block mismatch"
)
MEGDNN_STATIC_ASSERT
(
SrcIterator
::
width
==
DstIterator
::
width
,
"width block mismatch"
)
;
SrcIterator
::
width
==
DstIterator
::
width
,
"width block mismatch"
)
MEGDNN_STATIC_ASSERT
(
SrcIterator
::
size_nbits
==
DstIterator
::
size_nbits
,
"size in bits of elements mismatch"
)
;
"size in bits of elements mismatch"
)
static
constexpr
int
pack_chan
=
SrcIterator
::
chan_blk
;
static
constexpr
int
pack_width
=
SrcIterator
::
width
;
using
DnnSrcType
=
typename
CudaPostProcess
::
SrcType
;
...
...
dnn/src/cuda/relayout_format/relayout_format_utils.cuh
浏览文件 @
14e9ad62
...
...
@@ -80,7 +80,7 @@ inline __device__ DstType make_zero_pad(const uint8_t zero_point) {
template
<
>
inline
__device__
char4
make_zero_pad
<
char4
>
(
const
uint8_t
zero_point
)
{
char
izp
=
reinterpret_cast
<
const
char
&>
(
zero_point
);
signed
char
izp
=
reinterpret_cast
<
const
signed
char
&>
(
zero_point
);
return
{
izp
,
izp
,
izp
,
izp
};
}
...
...
dnn/src/cuda/resize/forward.cu
浏览文件 @
14e9ad62
...
...
@@ -16,6 +16,7 @@
#include "src/common/resize.cuh"
#include "src/cuda/cv/kernel_common.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -319,4 +320,5 @@ INST(int8_t);
}
// namespace cuda
}
// namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/type_cvt/kern.cu
浏览文件 @
14e9ad62
...
...
@@ -13,6 +13,7 @@
#include "megdnn/dtype.h"
#include "src/cuda/elemwise_helper.cuh"
#include "src/cuda/elemwise_helper_q4.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
...
...
@@ -410,4 +411,5 @@ MEGDNN_FOREACH_COMPUTING_CTYPE(INST_SRC_NORMAL_LOWBIT)
}
// namespace cuda
}
// namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_affine/warp_affine.cu
浏览文件 @
14e9ad62
...
...
@@ -12,6 +12,7 @@
#include <cstdio>
#include "src/common/rounding_converter.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
#include "src/cuda/warp_affine/common.cuh"
...
...
@@ -288,4 +289,5 @@ INST(int8_t)
}
// namespace cuda
}
// namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_perspective/forward.cu
浏览文件 @
14e9ad62
...
...
@@ -16,6 +16,7 @@
#include "src/common/rounding_converter.cuh"
#include "src/cuda/error_info.cuh"
#include "src/cuda/integer_subbyte_utils.cuh"
#include "src/cuda/kernel_common/diagnostic_prologue.cuh"
#include "src/cuda/utils.cuh"
#include "src/cuda/warp_perspective/common.cuh"
...
...
@@ -1760,4 +1761,5 @@ INST(dt_quint8, uint8_t, float)
}
// namespace cuda
}
// namespace megdnn
#include "src/cuda/kernel_common/diagnostic_epilogue.cuh"
// vim: syntax=cpp.doxygen
dnn/src/naive/relayout_format/opr_impl.cpp
浏览文件 @
14e9ad62
...
...
@@ -51,7 +51,7 @@ void lowbit_recursive_cp(
MEGDNN_STATIC_ASSERT
(
!
(
8
_z
%
size_nbits
),
"size in bits of lowbit data type can only be 1, 2, 4 "
"or 8"
)
;
"or 8"
)
if
(
idx
<
(
src
.
layout
.
ndim
-
1
))
{
for
(
size_t
i
=
0
;
i
<
src
.
layout
[
idx
];
++
i
)
{
lowbit_recursive_cp
<
size_nbits
>
(
...
...
src/opr/test/basic_arith/elemwise.cpp
浏览文件 @
14e9ad62
...
...
@@ -130,10 +130,10 @@ T do_round_mulh_saturate(T a, T b) {
MEGDNN_STATIC_ASSERT
(
std
::
numeric_limits
<
T
>::
digits
<=
32
,
"Portable RMULH is not supported for integer "
"types larger than 32 bits."
)
;
"types larger than 32 bits."
)
MEGDNN_STATIC_ASSERT
(
std
::
numeric_limits
<
T
>::
is_integer
,
"Input types should be integer for RMULH"
)
;
"Input types should be integer for RMULH"
)
bool
overflow
=
a
==
b
&&
a
==
DTypeTrait
<
T
>::
min
();
// TODO: This really should be
// rounding_shift_right_away_from_zero, but we haven't yet found a fast
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录