Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
e4b71bdf
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
e4b71bdf
编写于
1月 26, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(megdnn): remove unnessary 1x1 algo
GitOrigin-RevId: 809f9b2bbaabdafe31bc6e004a8e1a2cd9d18a2c
上级
44c8d2d1
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
1 addition
and
158 deletion
+1
-158
dnn/src/cuda/conv_bias/1x1.cpp
dnn/src/cuda/conv_bias/1x1.cpp
+0
-113
dnn/src/cuda/conv_bias/algo.cpp
dnn/src/cuda/conv_bias/algo.cpp
+0
-2
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+0
-27
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+0
-9
dnn/test/cuda/conv_bias.cpp
dnn/test/cuda/conv_bias.cpp
+0
-6
dnn/test/cuda/group_conv.cpp
dnn/test/cuda/group_conv.cpp
+1
-1
未找到文件。
dnn/src/cuda/conv_bias/1x1.cpp
已删除
100644 → 0
浏览文件 @
44c8d2d1
/**
* \file dnn/src/cuda/conv_bias/1x1.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "src/common/conv_bias.h"
#include "src/cuda/conv_bias/algo.h"
#include "src/cuda/handle.h"
#include "src/cuda/utils.cuh"
using
namespace
megdnn
;
using
namespace
cuda
;
using
namespace
conv_bias
;
bool
ConvBiasForwardImpl
::
Algo1x1
::
is_available
(
const
SizeArgs
&
args
)
const
{
if
(
args
.
z_layout
->
ndim
>
0
)
return
false
;
auto
&&
fm
=
args
.
filter_meta
;
return
fm
.
format
==
Param
::
Format
::
NCHW
&&
(
fm
.
dtype
.
enumv
()
==
DTypeEnum
::
Float32
||
fm
.
dtype
.
enumv
()
==
DTypeEnum
::
Float16
)
&&
fm
.
spatial_ndim
==
2
&&
fm
.
group
==
1
&&
fm
.
dilation
[
0
]
==
1
&&
fm
.
dilation
[
1
]
==
1
&&
fm
.
spatial
[
0
]
==
1
&&
fm
.
spatial
[
1
]
==
1
&&
fm
.
padding
[
0
]
==
0
&&
fm
.
padding
[
1
]
==
0
&&
fm
.
stride
[
0
]
==
1
&&
fm
.
stride
[
1
]
==
1
;
}
void
ConvBiasForwardImpl
::
Algo1x1
::
extract_matmul_layouts
(
const
SizeArgs
&
args
,
TensorLayout
&
A
,
TensorLayout
&
B
,
TensorLayout
&
C
)
{
auto
&&
fm
=
args
.
filter_meta
;
A
=
{{
fm
.
ocpg
,
fm
.
icpg
},
fm
.
dtype
};
B
.
ndim
=
2
;
B
.
shape
[
0
]
=
args
.
src_layout
->
shape
[
1
];
B
.
shape
[
1
]
=
args
.
src_layout
->
shape
[
2
]
*
args
.
src_layout
->
shape
[
3
];
B
.
stride
[
0
]
=
args
.
src_layout
->
stride
[
1
];
B
.
stride
[
1
]
=
1
;
B
.
dtype
=
args
.
src_layout
->
dtype
;
C
=
{{
args
.
dst_layout
->
shape
[
1
],
B
.
shape
[
1
]},
args
.
dst_layout
->
dtype
};
}
WorkspaceBundle
ConvBiasForwardImpl
::
Algo1x1
::
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
{
auto
dst_layout
=
*
args
.
dst_layout
;
SmallVector
<
size_t
>
sizes
;
if
(
dst_layout
.
dtype
.
enumv
()
!=
args
.
bias_layout
->
dtype
.
enumv
())
{
dst_layout
.
dtype
=
DType
();
args
.
opr
->
check_or_deduce_dtype_fwd
(
args
.
src_layout
->
dtype
,
args
.
filter_layout
->
dtype
,
dst_layout
.
dtype
);
sizes
.
push_back
(
dst_layout
.
span
().
dist_byte
());
}
SizeArgs
conv_args
=
args
;
conv_args
.
dst_layout
=
&
dst_layout
;
TensorLayout
A
,
B
,
C
;
extract_matmul_layouts
(
conv_args
,
A
,
B
,
C
);
sizes
.
insert
(
sizes
.
begin
(),
args
.
handle
->
matmul_opr
()
->
get_workspace_in_bytes
(
A
,
B
,
C
));
return
{
ptr
,
std
::
move
(
sizes
)};
}
size_t
ConvBiasForwardImpl
::
Algo1x1
::
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
{
return
get_workspace_bundle
(
nullptr
,
args
).
total_size_in_bytes
();
}
void
ConvBiasForwardImpl
::
Algo1x1
::
exec
(
const
ExecArgs
&
args
)
const
{
auto
bundle
=
get_workspace_bundle
(
args
.
workspace
.
raw_ptr
,
args
);
auto
conv_dst_tensor
=
*
args
.
dst_tensor
;
if
(
args
.
dst_layout
->
dtype
.
enumv
()
!=
args
.
bias_layout
->
dtype
.
enumv
())
{
conv_dst_tensor
.
raw_ptr
=
bundle
.
get
(
1
);
conv_dst_tensor
.
layout
.
dtype
=
DType
();
args
.
opr
->
check_or_deduce_dtype_fwd
(
args
.
src_layout
->
dtype
,
args
.
filter_layout
->
dtype
,
conv_dst_tensor
.
layout
.
dtype
);
}
ExecArgs
conv_args
=
args
;
conv_args
.
dst_tensor
=
&
conv_dst_tensor
;
conv_args
.
dst_layout
=
&
conv_dst_tensor
.
layout
;
{
TensorND
A
,
B
,
C
;
extract_matmul_layouts
(
conv_args
,
A
.
layout
,
B
.
layout
,
C
.
layout
);
A
.
raw_ptr
=
conv_args
.
filter_tensor
->
raw_ptr
;
B
.
raw_ptr
=
conv_args
.
src_tensor
->
raw_ptr
;
C
.
raw_ptr
=
conv_args
.
dst_tensor
->
raw_ptr
;
size_t
batch
=
conv_args
.
src_layout
->
shape
[
0
];
auto
mm
=
conv_args
.
handle
->
matmul_opr
();
auto
strd_B
=
conv_args
.
src_layout
->
stride
[
0
]
*
conv_args
.
src_layout
->
dtype
.
size
(),
strd_C
=
conv_args
.
dst_layout
->
stride
[
0
]
*
conv_args
.
dst_layout
->
dtype
.
size
();
for
(
size_t
i
=
0
;
i
<
batch
;
++
i
)
{
mm
->
exec
(
A
,
B
,
C
,
bundle
.
get_workspace
(
0
));
incr_voidp
(
B
.
raw_ptr
,
strd_B
);
incr_voidp
(
C
.
raw_ptr
,
strd_C
);
}
}
handle_bias_and_nonlinear
(
args
.
handle
,
args
.
nonlinear_mode
,
&
conv_dst_tensor
,
args
.
dst_tensor
,
args
.
bias_tensor
);
}
// vim: syntax=cpp.doxygen
dnn/src/cuda/conv_bias/algo.cpp
浏览文件 @
e4b71bdf
...
...
@@ -24,7 +24,6 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
non_cudnn_algos
.
push_back
(
&
matmul
);
non_cudnn_algos
.
push_back
(
&
matmul8x8x32
);
non_cudnn_algos
.
push_back
(
&
batched_matmul
);
non_cudnn_algos
.
push_back
(
&
a1x1
);
fill_cudnn_algos
();
for
(
auto
&&
algo
:
cudnn_conv_bias_activations
)
{
...
...
@@ -43,7 +42,6 @@ ConvBiasForwardImpl::AlgoPack::AlgoPack() {
conv_algos
.
push_back
(
&
matmul
);
conv_algos
.
push_back
(
&
matmul8x8x32
);
conv_algos
.
push_back
(
&
batched_matmul
);
conv_algos
.
push_back
(
&
a1x1
);
conv_algos
.
reserve
(
conv_algos
.
size
()
*
2
);
//! add gconv algos by AlgoGroupConvGeneral
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
e4b71bdf
...
...
@@ -51,7 +51,6 @@ public:
CUDA_INPLACE_MATMUL
,
CUDA_MATMUL
,
CUDA_MATMUL_INT8X8X32
,
CUDA_1X1
,
CUDA_BATCHED_MATMUL
,
CUDA_GROUP_CONV_GENERAL
,
CUDA_WMMA_UINT4X4X32
,
...
...
@@ -358,31 +357,6 @@ private:
mutable
std
::
string
m_name
;
};
//! optimized 1x1 conv
class
ConvBiasForwardImpl
::
Algo1x1
final
:
public
AlgoBase
{
static
void
extract_matmul_layouts
(
const
SizeArgs
&
args
,
TensorLayout
&
A
,
TensorLayout
&
B
,
TensorLayout
&
C
);
public:
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
const
char
*
name
()
const
override
{
if
(
m_name
.
empty
())
{
m_name
=
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL1X1"
,
{});
}
return
m_name
.
c_str
();
}
bool
is_reproducible
()
const
override
{
return
true
;
}
MEGDNN_DECL_ALGO_TYPE
(
CUDA_1X1
)
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
SizeArgs
&
args
)
const
;
mutable
std
::
string
m_name
;
};
class
ConvBiasForwardImpl
::
AlgoBatchedMatmul
final
:
public
AlgoBase
{
static
void
extract_matmul_layouts
(
const
SizeArgs
&
args
,
TensorLayout
&
A
,
TensorLayout
&
B
,
TensorLayout
&
C
);
...
...
@@ -738,7 +712,6 @@ public:
AlgoMatmul
matmul
;
AlgoMatmul8x8x32
matmul8x8x32
;
AlgoBatchedMatmul
batched_matmul
;
Algo1x1
a1x1
;
std
::
vector
<
AlgoInt8NCHW4DotProdImplicitGemm
>
int8_nchw4_dotprod
;
AlgoInt8CHWN4DotProdImplicitGemm
int8_chwn4_dotprod
;
#if CUDA_VERSION >= 10000
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
e4b71bdf
...
...
@@ -118,9 +118,6 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
if
(
sm_algo_pack
.
batched_matmul
.
is_available_reproducible
(
size_arg
,
reproducible
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batched_matmul
;
}
else
if
(
sm_algo_pack
.
a1x1
.
is_available_reproducible
(
size_arg
,
reproducible
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
a1x1
;
}
return
nullptr
;
};
...
...
@@ -178,12 +175,6 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
return
algo
;
}
int
batch
=
src
[
0
];
if
(
batch
==
1
&&
sm_algo_pack
.
a1x1
.
is_available_reproducible
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
a1x1
;
}
// modify conv_args dst_layout
conv_args
.
dst_layout
=
&
dst_layout
;
if
(
is_cudnn_supported
(
conv_args
))
{
...
...
dnn/test/cuda/conv_bias.cpp
浏览文件 @
e4b71bdf
...
...
@@ -834,12 +834,6 @@ TEST_F(CUDA, CONV_BIAS_FORWARD_MATMUL_1x1) {
.
set_epsilon
(
1e-3
);
for
(
auto
&&
arg
:
args
)
{
checker
.
set_param
(
arg
.
param
);
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"MATMUL1X1"
,
{})
.
c_str
()));
checker
.
execs
({
arg
.
src
,
arg
.
filter
,
arg
.
bias
,
{},
{}});
checker
.
set_before_exec_callback
(
conv_bias
::
ConvBiasAlgoChecker
<
ConvBias
>
(
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
...
...
dnn/test/cuda/group_conv.cpp
浏览文件 @
e4b71bdf
...
...
@@ -115,7 +115,7 @@ TEST_F(CUDA, GROUP_CONV_FORWARD_1x1) {
#if CUDNN_MAJOR <= 6
std
::
string
conv1x1_name
=
ConvBiasForward
::
algo_name
<
ConvBiasForward
::
MatmulParam
>
(
"
MATMUL1X1
"
,
{});
"
BATCHEDMATMUL
"
,
{});
checker
.
set_before_exec_callback
(
AlgoChecker
<
ConvolutionForward
>
(
ExecutionPolicyAlgoName
{
"DEFAULT"
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录