Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
f5833a52
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
f5833a52
编写于
4月 15, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
4月 22, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(dnn/cuda): fix cublas matmul on sm60
GitOrigin-RevId: 3fc0c30a23f1dfe35d6629595b2cb1a8c2f379c5
上级
6bd09b38
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
20 addition
and
13 deletion
+20
-13
dnn/src/cuda/conv_bias/matmul_8x8x32.cpp
dnn/src/cuda/conv_bias/matmul_8x8x32.cpp
+1
-1
dnn/src/cuda/matrix_mul/cublas.cpp
dnn/src/cuda/matrix_mul/cublas.cpp
+1
-1
dnn/test/cuda/benchmark.cpp
dnn/test/cuda/benchmark.cpp
+1
-1
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+3
-3
dnn/test/cuda/convolution.cpp
dnn/test/cuda/convolution.cpp
+2
-2
dnn/test/cuda/convolution3d.cpp
dnn/test/cuda/convolution3d.cpp
+1
-1
dnn/test/cuda/group_conv.cpp
dnn/test/cuda/group_conv.cpp
+1
-1
dnn/test/cuda/group_conv3d.cpp
dnn/test/cuda/group_conv3d.cpp
+1
-1
dnn/test/cuda/matrix_mul.cpp
dnn/test/cuda/matrix_mul.cpp
+9
-2
未找到文件。
dnn/src/cuda/conv_bias/matmul_8x8x32.cpp
浏览文件 @
f5833a52
...
@@ -21,7 +21,7 @@ bool ConvBiasForwardImpl::AlgoMatmul8x8x32::is_available(
...
@@ -21,7 +21,7 @@ bool ConvBiasForwardImpl::AlgoMatmul8x8x32::is_available(
const
SizeArgs
&
args
)
const
{
const
SizeArgs
&
args
)
const
{
if
(
args
.
z_layout
->
ndim
>
0
)
if
(
args
.
z_layout
->
ndim
>
0
)
return
false
;
return
false
;
if
(
cuda
::
current_device_prop
().
major
<
6
)
if
(
!
is_compute_capability_required
(
6
,
1
)
)
return
false
;
return
false
;
auto
dst_layout
=
*
args
.
dst_layout
;
auto
dst_layout
=
*
args
.
dst_layout
;
...
...
dnn/src/cuda/matrix_mul/cublas.cpp
浏览文件 @
f5833a52
...
@@ -42,7 +42,7 @@ bool MatrixMulForwardImpl::AlgoCuBlas::is_available(
...
@@ -42,7 +42,7 @@ bool MatrixMulForwardImpl::AlgoCuBlas::is_available(
*/
*/
return
args
.
layout_a
.
stride
[
0
]
%
4
==
0
&&
return
args
.
layout_a
.
stride
[
0
]
%
4
==
0
&&
args
.
layout_b
.
stride
[
0
]
%
4
==
0
&&
args
.
layout_b
.
stride
[
0
]
%
4
==
0
&&
current_device_prop
().
major
>
5
;
is_compute_capability_required
(
6
,
1
)
;
}
}
return
false
;
return
false
;
}
}
...
...
dnn/test/cuda/benchmark.cpp
浏览文件 @
f5833a52
...
@@ -24,7 +24,7 @@ namespace test {
...
@@ -24,7 +24,7 @@ namespace test {
TEST_F
(
CUDA
,
BENCHMARK_CONVOLUTION_8X8X32
)
TEST_F
(
CUDA
,
BENCHMARK_CONVOLUTION_8X8X32
)
{
{
if
(
cuda
::
current_device_prop
().
major
<
6
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
)
)
{
printf
(
"Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device"
printf
(
"Skip CUDA.BENCHMARK_CONVOLUTION_8X8X32 test as current device"
"doesn't support
\n
"
);
"doesn't support
\n
"
);
return
;
return
;
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
f5833a52
...
@@ -325,7 +325,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_SMALL) {
...
@@ -325,7 +325,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_CHANWISE_SMALL) {
}
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_CHANWISE_8x8x32
)
{
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_CHANWISE_8x8x32
)
{
require_compute_capability
(
6
,
0
);
require_compute_capability
(
6
,
1
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBias
::
DirectParam
>
(
ConvBiasForward
::
algo_name
<
ConvBias
::
DirectParam
>
(
...
@@ -472,7 +472,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) {
...
@@ -472,7 +472,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL) {
}
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_MATMUL_8x8x32
)
{
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_MATMUL_8x8x32
)
{
require_compute_capability
(
6
,
0
);
require_compute_capability
(
6
,
1
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
...
@@ -517,7 +517,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) {
...
@@ -517,7 +517,7 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_8x8x32) {
}
}
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_MATMUL_NCHW4
)
{
TEST_F
(
CUDA
,
CONV_BIAS_FORWARD_MATMUL_NCHW4
)
{
require_compute_capability
(
6
,
0
);
require_compute_capability
(
6
,
1
);
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
Checker
<
ConvBiasForward
>
checker
(
handle_cuda
());
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
...
...
dnn/test/cuda/convolution.cpp
浏览文件 @
f5833a52
...
@@ -30,7 +30,7 @@ namespace test {
...
@@ -30,7 +30,7 @@ namespace test {
TEST_F
(
CUDA
,
CONVOLUTION_8X8X32
)
TEST_F
(
CUDA
,
CONVOLUTION_8X8X32
)
{
{
if
(
cuda
::
current_device_prop
().
major
<
6
)
{
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
)
)
{
printf
(
"Skip CUDA.CONVOLUTION_8X8X32 test as current device"
printf
(
"Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support
\n
"
);
"doesn't support
\n
"
);
return
;
return
;
...
@@ -112,7 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
...
@@ -112,7 +112,7 @@ TEST_F(CUDA, CONVOLUTION_FORWARD)
}
}
TEST_F
(
CUDA
,
CONV_FORWARD_MATMUL_NCHW4
)
{
TEST_F
(
CUDA
,
CONV_FORWARD_MATMUL_NCHW4
)
{
if
(
cuda
::
current_device_prop
().
major
<
6
)
if
(
!
cuda
::
is_compute_capability_required
(
6
,
1
)
)
return
;
return
;
using
namespace
convolution
;
using
namespace
convolution
;
Checker
<
Convolution
>
checker
(
handle_cuda
());
Checker
<
Convolution
>
checker
(
handle_cuda
());
...
...
dnn/test/cuda/convolution3d.cpp
浏览文件 @
f5833a52
...
@@ -24,7 +24,7 @@ namespace test {
...
@@ -24,7 +24,7 @@ namespace test {
#if 0
#if 0
TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
TEST_F(CUDA, CONVOLUTION3D_8X8X32) {
if (
cuda::current_device_prop().major < 6
) {
if (
!cuda::is_compute_capability_required(6, 1)
) {
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
printf("Skip CUDA.CONVOLUTION_8X8X32 test as current device"
"doesn't support\n");
"doesn't support\n");
return;
return;
...
...
dnn/test/cuda/group_conv.cpp
浏览文件 @
f5833a52
...
@@ -23,7 +23,7 @@ namespace test {
...
@@ -23,7 +23,7 @@ namespace test {
TEST_F
(
CUDA
,
GROUP_CONV_FORWARD
)
TEST_F
(
CUDA
,
GROUP_CONV_FORWARD
)
{
{
bool
is_int_available
=
(
cuda
::
current_device_prop
().
major
>=
6
);
bool
is_int_available
=
cuda
::
is_compute_capability_required
(
6
,
1
);
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
IH
,
size_t
IW
,
size_t
FH
,
size_t
FW
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
/* OH */
,
size_t
/* OW */
,
size_t
OC
,
size_t
/* OH */
,
size_t
/* OW */
,
...
...
dnn/test/cuda/group_conv3d.cpp
浏览文件 @
f5833a52
...
@@ -21,7 +21,7 @@ namespace megdnn {
...
@@ -21,7 +21,7 @@ namespace megdnn {
namespace
test
{
namespace
test
{
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_FORWARD
)
{
TEST_F
(
CUDA
,
GROUP_CONVOLUTION3D_FORWARD
)
{
bool
is_int_available
=
(
cuda
::
current_device_prop
().
major
>=
6
);
bool
is_int_available
=
cuda
::
is_compute_capability_required
(
6
,
1
);
static_cast
<
void
>
(
is_int_available
);
static_cast
<
void
>
(
is_int_available
);
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
auto
run
=
[
&
](
size_t
N
,
size_t
IC
,
size_t
ID
,
size_t
IH
,
size_t
IW
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
PD
,
size_t
FD
,
size_t
FH
,
size_t
FW
,
size_t
OC
,
size_t
PD
,
...
...
dnn/test/cuda/matrix_mul.cpp
浏览文件 @
f5833a52
...
@@ -193,8 +193,15 @@ TEST_F(CUDA, MATRIX_MUL)
...
@@ -193,8 +193,15 @@ TEST_F(CUDA, MATRIX_MUL)
Checker
<
MatrixMul
>
checker
(
handle_cuda
());
Checker
<
MatrixMul
>
checker
(
handle_cuda
());
using
Param
=
MatrixMul
::
Param
;
using
Param
=
MatrixMul
::
Param
;
size_t
m
=
12
,
n
=
16
,
k
=
20
;
size_t
m
=
12
,
n
=
16
,
k
=
20
;
for
(
DType
dtype
:
std
::
array
<
DType
,
3
>
{
{
dtype
::
Float32
(),
dtype
::
Float16
(),
dtype
::
Int32
()}})
{
bool
is_int_available
=
cuda
::
is_compute_capability_required
(
6
,
1
);
std
::
vector
<
DType
>
dtype_array
;
dtype_array
.
push_back
(
dtype
::
Float32
());
dtype_array
.
push_back
(
dtype
::
Float16
());
if
(
is_int_available
)
dtype_array
.
push_back
(
dtype
::
Int32
());
for
(
DType
dtype
:
dtype_array
)
{
for
(
unsigned
mask
=
0
;
mask
<
4
;
++
mask
)
{
for
(
unsigned
mask
=
0
;
mask
<
4
;
++
mask
)
{
Param
param
;
Param
param
;
param
.
transposeA
=
mask
&
1
;
param
.
transposeA
=
mask
&
1
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录