Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
758549b9
MegEngine
项目概览
MegEngine 天元
/
MegEngine
接近 2 年 前同步成功
通知
414
Star
4708
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
758549b9
编写于
3月 19, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(megengine): support tx2
GitOrigin-RevId: d1175a1f4a2dded294a12e20016d54898455c58d
上级
84ce94fb
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
37 addition
and
61 deletion
+37
-61
dnn/test/cuda/accuracy_shake.cpp
dnn/test/cuda/accuracy_shake.cpp
+7
-1
dnn/test/cuda/benchmark.cpp
dnn/test/cuda/benchmark.cpp
+2
-6
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+11
-33
dnn/test/cuda/convolution3d.cpp
dnn/test/cuda/convolution3d.cpp
+2
-2
dnn/test/cuda/group_conv.cpp
dnn/test/cuda/group_conv.cpp
+2
-2
dnn/test/cuda/group_conv3d.cpp
dnn/test/cuda/group_conv3d.cpp
+2
-2
dnn/test/cuda/matrix_mul.cpp
dnn/test/cuda/matrix_mul.cpp
+3
-14
dnn/test/cuda/reduce.cpp
dnn/test/cuda/reduce.cpp
+1
-1
dnn/test/cuda/utils.cpp
dnn/test/cuda/utils.cpp
+7
-0
未找到文件。
dnn/test/cuda/accuracy_shake.cpp
浏览文件 @
758549b9
...
...
@@ -96,8 +96,8 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NHWC) {
}
TEST_F
(
CUDA
,
SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX
)
{
using
Format
=
ConvBias
::
Param
::
Format
;
require_compute_capability
(
7
,
5
);
using
Format
=
ConvBias
::
Param
::
Format
;
AccuracyShakeChecker
<
ConvBiasForward
>
checker
(
handle_cuda
());
UniformIntRNG
int_rng
{
-
5
,
5
};
UniformFloatRNG
float_rng
{
-
50
,
50
};
...
...
@@ -135,6 +135,7 @@ TEST_F(CUDA, SHAKE_CONV_BIAS_FORWARD_QS8_NCHWX) {
}
TEST_F
(
CUDA
,
SHAKE_MATRIX_MUL_FORWARD
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
MatrixMul
>
checker
(
handle_cuda
());
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
...
...
@@ -167,6 +168,7 @@ TEST_F(CUDA, SHAKE_BATCH_CONV_BIAS_QS8) {
}
TEST_F
(
CUDA
,
SHAKE_BATCHED_MATRIX_MUL
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
BatchedMatrixMul
>
checker
(
handle_cuda
());
UniformIntRNG
int_rng
{
-
127
,
127
};
...
...
@@ -189,6 +191,7 @@ TEST_F(CUDA, SHAKE_BATCHED_MATRIX_MUL) {
}
TEST_F
(
CUDA
,
SHAKE_CONVOLUTION3D_FORWARD
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
Convolution3DForward
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
float
scale
=
1.0
f
/
sqrt
(
5
);
...
...
@@ -207,6 +210,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION3D_FORWARD) {
}
TEST_F
(
CUDA
,
SHAKE_LOCAL_SHARE
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
LocalShare
>
checker
(
handle_cuda
());
using
Param
=
LocalShare
::
Param
;
Param
param
;
...
...
@@ -216,6 +220,7 @@ TEST_F(CUDA, SHAKE_LOCAL_SHARE) {
}
TEST_F
(
CUDA
,
SHAKE_CONVOLUTION_BACKWARD_DATA
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
...
...
@@ -229,6 +234,7 @@ TEST_F(CUDA, SHAKE_CONVOLUTION_BACKWARD_DATA) {
}
TEST_F
(
CUDA
,
SHAKE_CONVOLUTION_BACKWARD_FILTER
)
{
require_compute_capability
(
6
,
1
);
AccuracyShakeChecker
<
ConvolutionBackwardFilter
>
checker
(
handle_cuda
());
NormalRNG
default_rng
;
checker
.
set_dtype
(
0
,
dtype
::
Float32
())
...
...
dnn/test/cuda/benchmark.cpp
浏览文件 @
758549b9
...
...
@@ -11,11 +11,11 @@
#include "test/cuda/fixture.h"
#include "megdnn/oprs.h"
#include "src/cuda/utils.h"
#include "test/common/benchmarker.h"
#include "test/common/tensor.h"
#include "test/common/timer.h"
#include "test/common/workspace_wrapper.h"
#include "test/cuda/utils.h"
namespace
megdnn
{
namespace
test
{
...
...
@@ -23,11 +23,7 @@ namespace test {
#if MEGDNN_WITH_BENCHMARK
TEST_F
(
CUDA
,
BENCHMARK_CONVOLUTION_8X8X32
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device"
"doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
using
Param
=
param
::
Convolution
;
auto
run_1x1
=
[
&
](
size_t
N
,
size_t
OC
,
size_t
IC
,
size_t
H
,
size_t
W
)
{
Benchmarker
<
Convolution
>
benchmarker
(
handle_cuda
());
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
758549b9
...
...
@@ -13,7 +13,6 @@
#include "megdnn/dtype.h"
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"
#include "src/cuda/utils.h"
#include "test/common/accuracy_shake_checker.h"
#include "test/common/checker.h"
#include "test/common/rng.h"
...
...
@@ -21,6 +20,7 @@
#include "test/common/workspace_wrapper.h"
#include "test/cuda/benchmark.h"
#include "test/cuda/fixture.h"
#include "test/cuda/utils.h"
#define V1(x) #x
#define V(x) V1(x)
...
...
@@ -31,11 +31,7 @@ namespace megdnn {
namespace
test
{
TEST_F
(
CUDA
,
CONVOLUTION_8X8X32
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
;
...
...
@@ -116,8 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD) {
}
TEST_F
(
CUDA
,
CONV_FORWARD_MATMUL_NCHW4
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
return
;
require_compute_capability
(
6
,
1
);
using
namespace
convolution
;
Checker
<
Convolution
>
checker
(
handle_cuda
());
UniformIntRNG
int_rng
{
-
127
,
127
};
...
...
@@ -205,7 +200,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA) {
.
set_epsilon
(
1e-3
)
.
set_param
(
arg
.
param
)
.
exec
(
TensorLayoutArray
{
filter
,
dst
,
src
});
if
(
!
c
uda
::
is_compute_capability_required
(
6
,
0
))
{
if
(
!
c
heck_compute_capability
(
6
,
0
))
{
src
.
dtype
=
dst
.
dtype
=
filter
.
dtype
=
dtype
::
Float16
();
checker
.
set_rng
(
0
,
&
rng
)
.
set_rng
(
1
,
&
rng
)
...
...
@@ -315,8 +310,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_NHWC) {
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_CUDNN
)
{
if
(
cuda
::
is_compute_capability_required
(
7
,
0
))
return
;
require_compute_capability
(
7
,
0
);
using
namespace
convolution
;
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
...
...
@@ -372,11 +366,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_MATMUL) {
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A test as "
"current device doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_int8_nchw4_conv_bwd_data
();
...
...
@@ -430,12 +420,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW4_DP4A) {
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A test as "
"current device doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_int8_nchw_conv_bwd_data
();
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
...
...
@@ -463,11 +448,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NCHW_DP4A) {
#if CUDA_VERSION >= 10020
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA
)
{
if
(
!
cuda
::
is_compute_capability_required
(
7
,
5
))
{
printf
(
"Skip CUDA.CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA test as "
"current device doesn't support
\n
"
);
return
;
}
require_compute_capability
(
7
,
5
);
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_int8_nhwc_conv_bwd_data
();
...
...
@@ -527,8 +508,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_DATA_INT8_NHWC_IMMA) {
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_FAILED_CUDNN7_5
)
{
// BRAIN-481 failed on architectures 7.0, remove the following if statement,
// when cudnn fixed the problem.
if
(
cuda
::
is_compute_capability_required
(
7
,
0
))
return
;
require_compute_capability
(
7
,
0
);
using
namespace
convolution
;
std
::
vector
<
TestArg
>
args
=
get_args_cudnn_7_5_failures
();
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
...
...
@@ -662,8 +642,7 @@ TEST_F(CUDA, CONVOLUTION_BACKWARD_FILTER_MATMUL) {
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_FILTER_CUDNN
)
{
if
(
cuda
::
is_compute_capability_required
(
7
,
0
))
return
;
require_compute_capability
(
7
,
0
);
using
namespace
convolution
;
Checker
<
ConvolutionBackwardFilter
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
...
...
@@ -697,8 +676,7 @@ TEST_F(CUDA, CONV_CONFIG_COMBINATIONS) {
}
TEST_F
(
CUDA
,
CONVOLUTION_BACKWARD_DATA_1
)
{
if
(
cuda
::
is_compute_capability_required
(
7
,
0
))
return
;
require_compute_capability
(
7
,
0
);
using
namespace
convolution
;
Checker
<
ConvolutionBackwardData
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionBackwardData
>
(
...
...
dnn/test/cuda/convolution3d.cpp
浏览文件 @
758549b9
...
...
@@ -11,20 +11,20 @@
#include "test/common/convolution3d.h"
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"
#include "src/cuda/utils.h"
#include "test/common/benchmarker.h"
#include "test/common/checker.h"
#include "test/common/rng.h"
#include "test/common/tensor.h"
#include "test/common/workspace_wrapper.h"
#include "test/cuda/fixture.h"
#include "test/cuda/utils.h"
namespace
megdnn
{
namespace
test
{
#if 0
TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
if (!c
uda::is_compute_capability_required
(6, 1)) {
if (!c
heck_compute_capability
(6, 1)) {
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support\n");
return;
...
...
dnn/test/cuda/group_conv.cpp
浏览文件 @
758549b9
...
...
@@ -15,13 +15,13 @@
#include "test/common/convolution.h"
#include "test/cuda/fixture.h"
#include "
src
/cuda/utils.h"
#include "
test
/cuda/utils.h"
namespace
megdnn
{
namespace
test
{
TEST_F
(
CUDA
,
GROUP_CONV_FORWARD
)
{
bool
is_int_available
=
c
uda
::
is_compute_capability_required
(
6
,
1
);
bool
is_int_available
=
c
heck_compute_capability
(
6
,
1
);
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
/* OH */
,
size_t
/* OW */
,
size_t
PH
,
size_t
PW
,
size_t
SH
,
size_t
SW
,
size_t
DH
,
size_t
DW
,
size_t
group
)
{
...
...
dnn/test/cuda/group_conv3d.cpp
浏览文件 @
758549b9
...
...
@@ -15,13 +15,13 @@
#include "test/common/convolution3d.h"
#include "test/cuda/fixture.h"
#include "
src
/cuda/utils.h"
#include "
test
/cuda/utils.h"
namespace
megdnn
{
namespace
test
{
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_FORWARD
)
{
bool
is_int_available
=
c
uda
::
is_compute_capability_required
(
6
,
1
);
bool
is_int_available
=
c
heck_compute_capability
(
6
,
1
);
static_cast
<
void
>
(
is_int_available
);
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
PD
,
size_t
PH
,
size_t
PW
,
...
...
dnn/test/cuda/matrix_mul.cpp
浏览文件 @
758549b9
...
...
@@ -15,7 +15,6 @@
#include "test/common/checker.h"
#include "test/common/matrix_mul.h"
#include "src/cuda/utils.h"
#if defined(cuda_check)
#undef cuda_check
#endif
...
...
@@ -130,10 +129,7 @@ TEST_F(CUDA, PEAK_BENCHMARK_MATRIX_MUL_QUANTIZED4x4x32) {
#endif
TEST_F
(
CUDA
,
MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.MATRIX_MUL test as current device doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
Checker
<
MatrixMul
>
checker
(
handle_cuda
());
using
Param
=
MatrixMul
::
Param
;
Param
param
;
...
...
@@ -152,10 +148,7 @@ TEST_F(CUDA, MATRIX_MUL_INT8x8x32_WITH_SPETIAL_STRIDES) {
}
TEST_F
(
CUDA
,
MATRIX_MUL_INT8x8x32_NAIVE
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
))
{
printf
(
"Skip CUDA.MATRIX_MUL test as current device doesn't support
\n
"
);
return
;
}
require_compute_capability
(
6
,
1
);
using
Param
=
MatrixMul
::
Param
;
UniformIntRNG
rng
{
-
128
,
127
};
...
...
@@ -224,16 +217,12 @@ TEST_F(CUDA, MATRIX_MUL_FLOAT_NAIVE) {
}
TEST_F
(
CUDA
,
MATRIX_MUL
)
{
if
(
cuda
::
current_device_prop
().
major
<
6
)
{
printf
(
"Skip CUDA.MATRIX_MUL test as current device doesn't support
\n
"
);
return
;
}
Checker
<
MatrixMul
>
checker
(
handle_cuda
());
using
Param
=
MatrixMul
::
Param
;
size_t
m
=
12
,
n
=
16
,
k
=
20
;
bool
is_int_available
=
c
uda
::
is_compute_capability_required
(
6
,
1
);
bool
is_int_available
=
c
heck_compute_capability
(
6
,
1
);
std
::
vector
<
DType
>
dtype_array
;
dtype_array
.
push_back
(
dtype
::
Float32
());
dtype_array
.
push_back
(
dtype
::
Float16
());
...
...
dnn/test/cuda/reduce.cpp
浏览文件 @
758549b9
...
...
@@ -41,7 +41,7 @@ TEST_F(CUDA, REDUCE) {
checker
.
execs
({{
2
,
16
*
16
+
1
,
31
},
{}});
checker
.
execs
({{
2
,
16
*
16
*
16
+
1
,
31
},
{}});
checker
.
execs
({{
2
,
16
*
16
*
16
*
16
+
1
,
31
},
{}});
#if MEGDNN_TEGRA_X1
#if MEGDNN_TEGRA_X1
|| MEGDNN_TEGRA_X2
checker
.
execs
({{
2
,
8
*
16
*
16
*
16
*
16
+
1
,
31
},
{}});
#else
checker
.
execs
({{
2
,
16
*
16
*
16
*
16
*
16
+
1
,
31
},
{}});
...
...
dnn/test/cuda/utils.cpp
浏览文件 @
758549b9
...
...
@@ -18,6 +18,13 @@ bool check_compute_capability(int major, int minor) {
cuda_check
(
cudaGetDevice
(
&
dev
));
cudaDeviceProp
prop
;
cuda_check
(
cudaGetDeviceProperties
(
&
prop
,
dev
));
//! we just skip sm_62 here, which means jetson tx2
//! unless require sm_62 explicitly
if
(
prop
.
major
==
6
&&
prop
.
minor
==
2
)
{
return
prop
.
major
==
major
&&
prop
.
minor
==
minor
;
}
return
prop
.
major
>
major
||
(
prop
.
major
==
major
&&
prop
.
minor
>=
minor
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录