Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
ec1a99ac
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
ec1a99ac
编写于
3月 04, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refactor(mgb/dnn): replace reproducible with attribute
GitOrigin-RevId: d49015714c26432d9c965231d5c6c4d60efae4bd
上级
6af0299c
变更
73
显示空白变更内容
内联
并排
Showing
73 changed file
with
680 addition
and
691 deletion
+680
-691
dnn/include/megdnn/oprs/base.h
dnn/include/megdnn/oprs/base.h
+22
-12
dnn/src/common/algo_base.cpp
dnn/src/common/algo_base.cpp
+32
-1
dnn/src/common/algo_chooser.h
dnn/src/common/algo_chooser.h
+22
-23
dnn/src/cuda/batch_conv_bias/algo.h
dnn/src/cuda/batch_conv_bias/algo.h
+4
-5
dnn/src/cuda/batch_conv_bias/opr_impl.cpp
dnn/src/cuda/batch_conv_bias/opr_impl.cpp
+10
-10
dnn/src/cuda/batch_conv_bias/opr_impl.h
dnn/src/cuda/batch_conv_bias/opr_impl.h
+1
-1
dnn/src/cuda/batched_matrix_mul/algo.h
dnn/src/cuda/batched_matrix_mul/algo.h
+4
-5
dnn/src/cuda/batched_matrix_mul/opr_impl.cpp
dnn/src/cuda/batched_matrix_mul/opr_impl.cpp
+5
-8
dnn/src/cuda/batched_matrix_mul/opr_impl.h
dnn/src/cuda/batched_matrix_mul/opr_impl.h
+1
-1
dnn/src/cuda/conv_bias/algo.h
dnn/src/cuda/conv_bias/algo.h
+4
-5
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+22
-23
dnn/src/cuda/conv_bias/opr_impl.h
dnn/src/cuda/conv_bias/opr_impl.h
+1
-1
dnn/src/cuda/convolution/backward_data/algo.h
dnn/src/cuda/convolution/backward_data/algo.h
+4
-5
dnn/src/cuda/convolution/backward_filter/algo.h
dnn/src/cuda/convolution/backward_filter/algo.h
+4
-5
dnn/src/cuda/convolution/forward/algos.h
dnn/src/cuda/convolution/forward/algos.h
+6
-6
dnn/src/cuda/convolution/opr_impl.cpp
dnn/src/cuda/convolution/opr_impl.cpp
+42
-40
dnn/src/cuda/convolution/opr_impl.h
dnn/src/cuda/convolution/opr_impl.h
+13
-13
dnn/src/cuda/convolution3d/backward_data/algo.h
dnn/src/cuda/convolution3d/backward_data/algo.h
+4
-5
dnn/src/cuda/convolution3d/backward_filter/algo.h
dnn/src/cuda/convolution3d/backward_filter/algo.h
+4
-5
dnn/src/cuda/convolution3d/forward/algo.h
dnn/src/cuda/convolution3d/forward/algo.h
+4
-5
dnn/src/cuda/convolution3d/helper.h
dnn/src/cuda/convolution3d/helper.h
+11
-10
dnn/src/cuda/convolution3d/opr_impl.cpp
dnn/src/cuda/convolution3d/opr_impl.cpp
+40
-39
dnn/src/cuda/convolution3d/opr_impl.h
dnn/src/cuda/convolution3d/opr_impl.h
+12
-12
dnn/src/cuda/deformable_conv/bwd_data/algo.h
dnn/src/cuda/deformable_conv/bwd_data/algo.h
+4
-5
dnn/src/cuda/deformable_conv/bwd_flt/algo.h
dnn/src/cuda/deformable_conv/bwd_flt/algo.h
+4
-5
dnn/src/cuda/deformable_conv/fwd/algo.h
dnn/src/cuda/deformable_conv/fwd/algo.h
+4
-5
dnn/src/cuda/deformable_conv/opr_impl.cpp
dnn/src/cuda/deformable_conv/opr_impl.cpp
+32
-30
dnn/src/cuda/deformable_conv/opr_impl.h
dnn/src/cuda/deformable_conv/opr_impl.h
+7
-6
dnn/src/cuda/local_share/backward_data/algo.h
dnn/src/cuda/local_share/backward_data/algo.h
+4
-5
dnn/src/cuda/local_share/backward_filter/algo.h
dnn/src/cuda/local_share/backward_filter/algo.h
+4
-5
dnn/src/cuda/local_share/forward/algo.h
dnn/src/cuda/local_share/forward/algo.h
+4
-5
dnn/src/cuda/local_share/opr_impl.cpp
dnn/src/cuda/local_share/opr_impl.cpp
+29
-28
dnn/src/cuda/local_share/opr_impl.h
dnn/src/cuda/local_share/opr_impl.h
+3
-3
dnn/src/cuda/matrix_mul/algos.h
dnn/src/cuda/matrix_mul/algos.h
+4
-5
dnn/src/cuda/matrix_mul/opr_impl.cpp
dnn/src/cuda/matrix_mul/opr_impl.cpp
+10
-10
dnn/src/cuda/matrix_mul/opr_impl.h
dnn/src/cuda/matrix_mul/opr_impl.h
+1
-1
dnn/src/fallback/batched_matrix_mul/algos.h
dnn/src/fallback/batched_matrix_mul/algos.h
+4
-5
dnn/src/fallback/batched_matrix_mul/opr_impl.cpp
dnn/src/fallback/batched_matrix_mul/opr_impl.cpp
+6
-6
dnn/src/fallback/batched_matrix_mul/opr_impl.h
dnn/src/fallback/batched_matrix_mul/opr_impl.h
+1
-1
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+9
-12
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+7
-9
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+19
-24
dnn/src/fallback/convolution/opr_impl.h
dnn/src/fallback/convolution/opr_impl.h
+14
-16
dnn/src/fallback/matrix_mul/opr_impl.cpp
dnn/src/fallback/matrix_mul/opr_impl.cpp
+13
-11
dnn/src/fallback/matrix_mul/opr_impl.h
dnn/src/fallback/matrix_mul/opr_impl.h
+5
-6
dnn/src/naive/batch_conv_bias/opr_impl.cpp
dnn/src/naive/batch_conv_bias/opr_impl.cpp
+6
-8
dnn/src/naive/batch_conv_bias/opr_impl.h
dnn/src/naive/batch_conv_bias/opr_impl.h
+1
-1
dnn/src/naive/batched_matrix_mul/opr_impl.cpp
dnn/src/naive/batched_matrix_mul/opr_impl.cpp
+1
-1
dnn/src/naive/batched_matrix_mul/opr_impl.h
dnn/src/naive/batched_matrix_mul/opr_impl.h
+1
-1
dnn/src/naive/conv_bias/opr_impl.cpp
dnn/src/naive/conv_bias/opr_impl.cpp
+6
-8
dnn/src/naive/conv_bias/opr_impl.h
dnn/src/naive/conv_bias/opr_impl.h
+1
-1
dnn/src/naive/convolution/convolution.cpp
dnn/src/naive/convolution/convolution.cpp
+18
-24
dnn/src/naive/convolution/opr_impl.h
dnn/src/naive/convolution/opr_impl.h
+3
-3
dnn/src/naive/convolution3d/convolution3d.cpp
dnn/src/naive/convolution3d/convolution3d.cpp
+18
-24
dnn/src/naive/convolution3d/opr_impl.h
dnn/src/naive/convolution3d/opr_impl.h
+3
-3
dnn/src/naive/deformable_conv/opr_impl.h
dnn/src/naive/deformable_conv/opr_impl.h
+3
-3
dnn/src/naive/local_share/opr_impl.cpp
dnn/src/naive/local_share/opr_impl.cpp
+18
-24
dnn/src/naive/local_share/opr_impl.h
dnn/src/naive/local_share/opr_impl.h
+3
-3
dnn/src/naive/matrix_mul/opr_impl.cpp
dnn/src/naive/matrix_mul/opr_impl.cpp
+1
-1
dnn/src/naive/matrix_mul/opr_impl.h
dnn/src/naive/matrix_mul/opr_impl.h
+1
-1
dnn/src/rocm/batched_matrix_mul/algos.h
dnn/src/rocm/batched_matrix_mul/algos.h
+4
-5
dnn/src/rocm/batched_matrix_mul/opr_impl.cpp
dnn/src/rocm/batched_matrix_mul/opr_impl.cpp
+6
-6
dnn/src/rocm/batched_matrix_mul/opr_impl.h
dnn/src/rocm/batched_matrix_mul/opr_impl.h
+1
-1
dnn/src/rocm/convolution/backward_data/algo.h
dnn/src/rocm/convolution/backward_data/algo.h
+9
-14
dnn/src/rocm/convolution/backward_filter/algo.h
dnn/src/rocm/convolution/backward_filter/algo.h
+9
-14
dnn/src/rocm/convolution/forward/algo.h
dnn/src/rocm/convolution/forward/algo.h
+9
-14
dnn/src/rocm/convolution/opr_impl.cpp
dnn/src/rocm/convolution/opr_impl.cpp
+36
-37
dnn/src/rocm/convolution/opr_impl.h
dnn/src/rocm/convolution/opr_impl.h
+12
-12
dnn/src/rocm/matrix_mul/algos.h
dnn/src/rocm/matrix_mul/algos.h
+4
-5
dnn/src/rocm/matrix_mul/opr_impl.cpp
dnn/src/rocm/matrix_mul/opr_impl.cpp
+6
-6
dnn/src/rocm/matrix_mul/opr_impl.h
dnn/src/rocm/matrix_mul/opr_impl.h
+1
-1
src/opr/impl/search_policy/algo_chooser.cpp
src/opr/impl/search_policy/algo_chooser.cpp
+37
-25
src/opr/test/dnn/convolution.cpp
src/opr/test/dnn/convolution.cpp
+2
-2
未找到文件。
dnn/include/megdnn/oprs/base.h
浏览文件 @
ec1a99ac
...
...
@@ -105,6 +105,10 @@ public:
*
*/
enum
class
Attribute
:
uint32_t
{
/**
* \brief general algo.
*/
DEFAULT
=
0
,
/**
* \brief whether the execution result is
...
...
@@ -163,6 +167,8 @@ public:
bool
contain_attribute
(
const
Attribute
&
attr
)
const
;
static
std
::
string
attribute_str
(
const
Attribute
&
attr
);
Handle
::
HandleType
handle_type
()
const
{
return
m_handle_type
;
}
Info
info
()
const
{
return
{{
handle_type
(),
type
(),
param
()},
name
(),
attribute
()};
...
...
@@ -311,6 +317,7 @@ class MultiAlgoOpr<Opr, 3> : public MultiAlgoOpr<Opr, -1> {
public:
using
Algorithm
=
detail
::
Algorithm
;
using
AlgorithmInfo
=
detail
::
Algorithm
::
Info
;
using
AlgoAttribute
=
detail
::
Algorithm
::
Attribute
;
//! get all possible algorithm decriptions for the specified layouts
std
::
vector
<
AlgorithmInfo
>
get_all_algorithms_info
(
const
TensorLayout
&
p0
,
...
...
@@ -335,9 +342,9 @@ public:
const
TensorLayout
&
p2
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
{
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
{
return
get_algorithm_heuristic
(
p0
,
p1
,
p2
,
workspace_limit_in_bytes
,
reproducible
)
attr
)
->
info
();
}
...
...
@@ -360,7 +367,7 @@ protected:
const
TensorLayout
&
p2
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
=
0
;
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
=
0
;
};
//! specializae for nargs == 4
...
...
@@ -369,6 +376,7 @@ class MultiAlgoOpr<Opr, 4> : public MultiAlgoOpr<Opr, -1> {
public:
using
Algorithm
=
detail
::
Algorithm
;
using
AlgorithmInfo
=
detail
::
Algorithm
::
Info
;
using
AlgoAttribute
=
detail
::
Algorithm
::
Attribute
;
//! get all possible algorithm decriptions for the specified layouts
std
::
vector
<
AlgorithmInfo
>
get_all_algorithms_info
(
const
TensorLayout
&
p0
,
...
...
@@ -394,9 +402,9 @@ public:
const
TensorLayout
&
p2
,
const
TensorLayout
&
p3
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
{
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
{
return
get_algorithm_heuristic
(
p0
,
p1
,
p2
,
p3
,
workspace_limit_in_bytes
,
reproducible
)
attr
)
->
info
();
}
...
...
@@ -419,7 +427,7 @@ protected:
const
TensorLayout
&
p2
,
const
TensorLayout
&
p3
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
=
0
;
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
=
0
;
};
//! specializae for nargs == 5
...
...
@@ -428,6 +436,7 @@ class MultiAlgoOpr<Opr, 5> : public MultiAlgoOpr<Opr, -1> {
public:
using
Algorithm
=
detail
::
Algorithm
;
using
AlgorithmInfo
=
detail
::
Algorithm
::
Info
;
using
AlgoAttribute
=
detail
::
Algorithm
::
Attribute
;
//! get all possible algorithm decriptions for the specified layouts
std
::
vector
<
AlgorithmInfo
>
get_all_algorithms_info
(
const
TensorLayout
&
p0
,
...
...
@@ -455,9 +464,9 @@ public:
const
TensorLayout
&
p4
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
{
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
{
return
get_algorithm_heuristic
(
p0
,
p1
,
p2
,
p3
,
p4
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -482,7 +491,7 @@ protected:
const
TensorLayout
&
p4
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
=
0
;
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
=
0
;
};
//! specializae for nargs == 8
...
...
@@ -491,6 +500,7 @@ class MultiAlgoOpr<Opr, 8> : public MultiAlgoOpr<Opr, -1> {
public:
using
Algorithm
=
detail
::
Algorithm
;
using
AlgorithmInfo
=
detail
::
Algorithm
::
Info
;
using
AlgoAttribute
=
detail
::
Algorithm
::
Attribute
;
//! get all possible algorithm decriptions for the specified layouts
std
::
vector
<
AlgorithmInfo
>
get_all_algorithms_info
(
...
...
@@ -518,9 +528,9 @@ public:
const
TensorLayout
&
p6
,
const
TensorLayout
&
p7
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
{
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
{
return
get_algorithm_heuristic
(
p0
,
p1
,
p2
,
p3
,
p4
,
p5
,
p6
,
p7
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -547,7 +557,7 @@ protected:
const
TensorLayout
&
p6
,
const
TensorLayout
&
p7
,
size_t
workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
(),
bool
reproducible
=
false
)
=
0
;
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
DEFAULT
)
=
0
;
};
}
// namespace detail
...
...
dnn/src/common/algo_base.cpp
浏览文件 @
ec1a99ac
...
...
@@ -15,8 +15,39 @@
using
namespace
megdnn
;
#define FOREACH_ALGO_ATTRIBUTE(cb) \
cb(DEFAULT) \
cb(REPRODUCIBLE) \
cb(NAIVE)
namespace
{
inline
const
char
*
attr_str
(
const
AlgoAttribute
&
attr
)
{
#define cb(attr) \
case AlgoAttribute::attr: \
return #attr;
switch
(
attr
)
{
FOREACH_ALGO_ATTRIBUTE
(
cb
)
}
#undef cb
return
"unknown arch"
;
}
}
// namespace
std
::
string
Algorithm
::
attribute_str
(
const
Attribute
&
attr
)
{
std
::
string
ret
;
uint32_t
attr_val
=
static_cast
<
uint32_t
>
(
attr
);
while
(
attr_val
)
{
uint32_t
mask
=
~
(
attr_val
&
(
attr_val
-
1
));
Attribute
sub_attr
=
static_cast
<
Attribute
>
(
mask
&
attr_val
);
if
(
!
ret
.
empty
())
{
ret
.
append
(
" | "
);
}
ret
.
append
(
attr_str
(
sub_attr
));
attr_val
=
attr_val
&
(
attr_val
-
1
);
}
return
ret
;
}
bool
Algorithm
::
contain_attribute
(
const
Attribute
&
attr
)
const
{
return
bool
(
attribute
()
&
attr
);
return
attr
==
static_cast
<
Attribute
>
(
attribute
()
&
attr
);
}
// vim: syntax=cpp.doxygen
dnn/src/common/algo_chooser.h
浏览文件 @
ec1a99ac
...
...
@@ -32,7 +32,7 @@ typename Opr::AlgoBase* get_algorithm(Opr* opr, Args&&... args) {
}
else
{
ret
=
opr
->
get_algorithm_info_heuristic
(
std
::
forward
<
Args
>
(
args
)...,
std
::
numeric_limits
<
size_t
>::
max
(),
false
).
desc
;
AlgoAttribute
::
DEFAULT
).
desc
;
}
return
static_cast
<
typename
Opr
::
AlgoBase
*>
(
opr
->
get_algorithm_from_desc
(
ret
));
...
...
@@ -51,7 +51,7 @@ typename Opr::AlgoBase* get_algorithm_or_construct(Opr* opr, Args&&... args) {
return
static_cast
<
typename
Opr
::
AlgoBase
*>
(
opr
->
get_algorithm_heuristic
(
std
::
forward
<
Args
>
(
args
)...,
std
::
numeric_limits
<
size_t
>::
max
(),
false
));
AlgoAttribute
::
DEFAULT
));
}
}
...
...
@@ -74,37 +74,34 @@ std::vector<typename Opr::Algorithm*> get_all_algorithms(
}
/*!
* \brief a helper function to get a
reproducible algorithm
. If require a
*
reproducible algorithm, and the given algorithm is reproducible, return the
* given algorithm. Otherwise return nullptr
* \brief a helper function to get a
n algorithm with attribute
. If require a
*
algorithm with specified attribute, and the given algorithm has that
*
attribute, return the
given algorithm. Otherwise return nullptr
*/
template
<
typename
Opr
>
typename
Opr
::
Algorithm
*
get_reproducible_algo
(
typename
Opr
::
AlgoBase
*
algo
,
bool
reproducible
)
{
if
(
reproducible
)
{
if
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
{
return
algo
;
}
}
else
{
typename
Opr
::
Algorithm
*
get_algo_with_attribute
(
typename
Opr
::
AlgoBase
*
algo
,
const
AlgoAttribute
&
attr
)
{
if
(
algo
->
contain_attribute
(
attr
))
{
return
algo
;
}
return
nullptr
;
}
template
<
typename
Opr
>
typename
Opr
::
Algorithm
*
get_
reproducible_algo
(
typename
Opr
::
Algorithm
*
get_
algo_with_attribute
(
const
std
::
vector
<
typename
Opr
::
AlgoBase
*>&
algos
,
const
typename
Opr
::
AlgoBase
::
SizeArgs
&
args
,
size_t
workspace_limit_in_bytes
,
const
char
*
name
)
{
size_t
workspace_limit_in_bytes
,
const
char
*
name
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
)
{
size_t
min_workspace_limit_in_bytes
=
std
::
numeric_limits
<
size_t
>::
max
();
bool
available_but_limited_by_workspace
=
false
;
bool
available_but_
not_reproducibl
e
=
false
;
bool
available_but_
without_attribut
e
=
false
;
for
(
auto
i
:
algos
)
{
if
(
i
->
is_available_
reproducible
(
args
,
true
,
if
(
i
->
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
i
;
}
if
(
i
->
is_available_
reproducibl
e
(
args
))
{
if
(
i
->
is_available_
attribut
e
(
args
))
{
if
(
i
->
get_workspace_in_bytes
(
args
)
>
workspace_limit_in_bytes
)
{
available_but_limited_by_workspace
=
true
;
min_workspace_limit_in_bytes
=
...
...
@@ -113,20 +110,22 @@ typename Opr::Algorithm* get_reproducible_algo(
}
}
if
(
i
->
is_available
(
args
))
{
if
(
!
i
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
available_but_
not_reproducibl
e
=
true
;
if
(
!
i
->
contain_attribute
(
attr
))
available_but_
without_attribut
e
=
true
;
}
}
MEGDNN_MARK_USED_VAR
(
name
);
if
(
available_but_limited_by_workspace
)
{
megdnn_throw
(
ssprintf
(
"no
reproducible %s algorithm
: %s workspace limit %zu is "
"no
%s algorithm with attribute:%s
: %s workspace limit %zu is "
"less than mini workspace limit %zu"
,
name
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
,
name
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
,
min_workspace_limit_in_bytes
));
}
else
if
(
available_but_not_reproducible
)
{
megdnn_throw
(
ssprintf
(
"no reproducible %s algorithm"
,
name
));
}
else
if
(
available_but_without_attribute
)
{
megdnn_throw
(
ssprintf
(
"no %s algorithm with attribute:%s"
,
name
,
Algorithm
::
attribute_str
(
attr
).
c_str
()));
}
else
{
megdnn_throw
(
ssprintf
(
"no usable %s algorithm"
,
name
));
}
...
...
dnn/src/cuda/batch_conv_bias/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -65,12 +65,11 @@ public:
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
dnn/src/cuda/batch_conv_bias/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -22,21 +22,21 @@ BatchConvBiasForwardImpl::get_algorithm_heuristic(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
bias
,
z
,
dst
);
if
(
sm_algo_pack
.
int8_nchw4_gemm_dotprod
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
int8_nchw4_gemm_dotprod
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
int8_nchw4_gemm_dotprod
;
}
if
(
sm_algo_pack
.
int8_nchw4_implicit_gemm_dotprod
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
int8_nchw4_implicit_gemm_dotprod
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
int8_nchw4_implicit_gemm_dotprod
;
}
megdnn_throw
(
ssprintf
(
"no %s batch conv bias algorithm with
args(%s) and "
megdnn_throw
(
ssprintf
(
"no batch conv bias algorithm with attribute%s
args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
()
,
workspace_limit_in_bytes
));
}
std
::
vector
<
BatchConvBiasForwardImpl
::
Algorithm
*>
...
...
dnn/src/cuda/batch_conv_bias/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -48,7 +48,7 @@ protected:
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/cuda/batched_matrix_mul/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -68,12 +68,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/batched_matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -55,24 +55,21 @@ std::vector<Algorithm*> BatchedMatrixMulForwardImpl::get_all_algorithms(
Algorithm
*
BatchedMatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
MEGDNN_MARK_USED_VAR
(
workspace_limit_in_bytes
);
AlgoBase
::
SizeArgs
args
(
this
,
A
,
B
,
C
);
if
(
sm_algo_pack
.
cublas
.
is_available_
reproducible
(
args
,
reproducible
))
{
if
(
sm_algo_pack
.
cublas
.
is_available_
attribute
(
args
,
attr
))
{
return
&
sm_algo_pack
.
cublas
;
}
#if CUDA_VERSION >= 10010
else
if
(
sm_algo_pack
.
cublasLt
.
is_available_reproducible
(
args
,
reproducible
))
{
else
if
(
sm_algo_pack
.
cublasLt
.
is_available_attribute
(
args
,
attr
))
{
return
&
sm_algo_pack
.
cublasLt
;
}
#endif
else
if
(
sm_algo_pack
.
int8x8x32
.
is_available_reproducible
(
args
,
reproducible
))
{
else
if
(
sm_algo_pack
.
int8x8x32
.
is_available_attribute
(
args
,
attr
))
{
return
&
sm_algo_pack
.
int8x8x32
;
}
else
{
if
(
sm_algo_pack
.
brute_force
.
is_available_reproducible
(
args
,
reproducible
))
{
if
(
sm_algo_pack
.
brute_force
.
is_available_attribute
(
args
,
attr
))
{
return
&
sm_algo_pack
.
brute_force
;
}
}
...
...
dnn/src/cuda/batched_matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -49,7 +49,7 @@ protected:
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/cuda/conv_bias/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -127,12 +127,11 @@ public:
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -51,7 +51,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
using
namespace
conv_bias
;
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
bias
,
z
,
dst
};
auto
dst_layout
=
*
args
.
dst_layout
;
...
...
@@ -74,7 +74,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
};
auto
get_cudnn_algo
=
[
this
,
&
conv_args
,
&
args
,
workspace_limit_in_bytes
,
reproducible
](
[
this
,
&
conv_args
,
&
args
,
workspace_limit_in_bytes
,
attr
](
const
thin_function
<
AlgoBase
*
(
cudnnConvolutionFwdAlgo_t
)
>&
cb
)
->
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
...
...
@@ -92,8 +92,8 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
&
ret_count
,
algo_perf
.
data
()));
for
(
int
i
=
0
;
i
<
ret_count
;
++
i
)
{
auto
conv_bias_algo
=
cb
(
algo_perf
[
i
].
algo
);
if
(
conv_bias_algo
->
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
if
(
conv_bias_algo
->
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
return
conv_bias_algo
;
}
#else
...
...
@@ -105,7 +105,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
workspace_limit_in_bytes
,
&
algo
));
auto
conv_bias_algo
=
cb
(
algo
);
if
(
conv_bias_algo
->
is_available_
reproducible
(
args
,
reproducible
,
if
(
conv_bias_algo
->
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
return
conv_bias_algo
;
#endif
...
...
@@ -113,10 +113,10 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
};
auto
get_1x1_algo
=
[
workspace_limit_in_bytes
,
reproducible
](
const
AlgoBase
::
SizeArgs
&
size_arg
)
attr
](
const
AlgoBase
::
SizeArgs
&
size_arg
)
->
ConvBiasForwardImpl
::
AlgoBase
*
{
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
reproducibl
e
(
size_arg
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
attribut
e
(
size_arg
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batched_matmul
;
}
return
nullptr
;
...
...
@@ -144,11 +144,11 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
//! avoid bad case in cudnn, check dnn chanwise impl first
if
(
is_chanwise
)
{
if
(
prefer_dnn_chanwise
)
{
if
(
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
if
(
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
return
&
sm_algo_pack
.
chanwise
;
if
(
sm_algo_pack
.
chanwise8x8x32
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
if
(
sm_algo_pack
.
chanwise8x8x32
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
return
&
sm_algo_pack
.
chanwise8x8x32
;
}
else
{
conv_args
.
dst_layout
=
&
dst_layout
;
...
...
@@ -163,8 +163,7 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
//! Prefer CUDNN CONVBIAS.
bool
cudnn_conv_bias_act_supported
=
false
;
for
(
auto
&&
algo
:
sm_algo_pack
.
cudnn_conv_bias_activations
)
{
if
(
algo
.
is_available_reproducible
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
algo
.
is_available_attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
cudnn_conv_bias_act_supported
=
true
;
break
;
}
...
...
@@ -201,26 +200,26 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
return
algo
;
}
if
(
sm_algo_pack
.
fallback_nchw_qs8
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
fallback_nchw_qs8
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
fallback_nchw_qs8
;
}
if
(
args
.
src_layout
->
dtype
.
enumv
()
!=
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvBiasForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvBiasForwardImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda convbias fwd"
);
workspace_limit_in_bytes
,
"cuda convbias fwd"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvBiasForwardImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda convbias fwd"
);
}
}
else
{
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvBiasForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvBiasForwardImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
"cuda convbias fwd"
);
"cuda convbias fwd"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvBiasForwardImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/cuda/conv_bias/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -82,7 +82,7 @@ public:
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/cuda/convolution/backward_data/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -82,12 +82,11 @@ public:
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
dnn/src/cuda/convolution/backward_filter/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -78,12 +78,11 @@ public:
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
dnn/src/cuda/convolution/forward/algos.h
浏览文件 @
ec1a99ac
...
...
@@ -63,13 +63,13 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
auto
req
=
get_workspace_in_bytes
(
args
);
...
...
dnn/src/cuda/convolution/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -12,6 +12,7 @@
#include "src/cuda/convolution/opr_impl.h"
#include "megdnn/dtype.h"
#include "src/common/algo_chooser.h"
#include "src/cuda/convolution/helper.h"
#include "src/cuda/convolution/forward/algos.h"
#include "src/cuda/convolution/backward_data/algo.h"
...
...
@@ -36,10 +37,10 @@ ConvolutionForwardImpl::get_algorithm_heuristic(const TensorLayout& src,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
{
this
,
src
,
filter
,
dst
};
MEGDNN_MARK_USED_VAR
(
workspace_limit_in_bytes
);
MEGDNN_MARK_USED_VAR
(
reproducible
);
MEGDNN_MARK_USED_VAR
(
attr
);
return
&
sm_algo_pack
.
algo_default
;
}
...
...
@@ -100,32 +101,32 @@ ConvolutionBackwardDataImpl::Algorithm*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
grad
,
filter
,
diff
);
return
get_algorithm_heuristic
(
filter
,
fm
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
);
workspace_limit_in_bytes
,
attr
);
}
ConvolutionBackwardDataImpl
::
Algorithm
*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
filter_meta
,
diff
,
grad
);
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
// prefer special chanwise impl
return
&
sm_algo_pack
.
chanwise
;
}
if
(
args
.
filter_layout
->
dtype
.
enumv
()
==
DTypeTrait
<
dtype
::
QuantizedS8
>::
enumv
)
{
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
int8_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_data"
);
"cuda conv bwd_data"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
int8_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -133,9 +134,8 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
}
}
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
reproducible
]()
->
ConvolutionBackwardDataImpl
::
AlgoBase
*
{
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
attr
]()
->
ConvolutionBackwardDataImpl
::
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
CUDNNBwdDataDescs
desc
;
args
.
init_desc
(
desc
);
...
...
@@ -153,7 +153,7 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
for
(
int
i
=
0
;
i
<
ret_count
;
++
i
)
{
if
(
algo_perf
[
i
].
memory
>
workspace_limit_in_bytes
)
continue
;
if
(
reproducible
)
{
if
(
attr
&
AlgoAttribute
::
REPRODUCIBLE
)
{
if
(
algo_perf
[
i
].
determinism
==
CUDNN_DETERMINISTIC
)
{
return
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
...
...
@@ -174,8 +174,8 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
auto
&&
cast_algo
=
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
));
return
reinterpret_cast
<
AlgoBase
*>
(
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
cast_algo
,
reproducible
));
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
cast_algo
,
attr
));
#endif
};
...
...
@@ -197,20 +197,20 @@ ConvolutionBackwardDataImpl::get_algorithm_heuristic(
if
(
args
.
filter_layout
->
dtype
.
enumv
()
!=
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_data"
);
workspace_limit_in_bytes
,
"cuda conv bwd_data"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_data"
);
}
}
else
{
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_data"
);
"cuda conv bwd_data"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -255,29 +255,29 @@ ConvolutionBackwardFilterImpl::Algorithm*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
grad
,
diff
);
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
fm
,
workspace_limit_in_bytes
,
reproducible
);
workspace_limit_in_bytes
,
attr
);
}
ConvolutionBackwardFilterImpl
::
Algorithm
*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
,
grad_meta
);
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
// prefer special chanwise impl
return
&
sm_algo_pack
.
chanwise
;
}
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
reproducible
]()
->
ConvolutionBackwardFilterImpl
::
AlgoBase
*
{
attr
]()
->
ConvolutionBackwardFilterImpl
::
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
CUDNNBwdFilterDescs
desc
;
args
.
init_desc
(
desc
);
...
...
@@ -305,7 +305,7 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
for
(
int
i
=
0
;
i
<
ret_count
;
++
i
)
{
if
(
algo_perf
[
i
].
memory
>
workspace_limit_in_bytes
)
continue
;
if
(
reproducible
)
{
if
(
attr
&
AlgoAttribute
::
REPRODUCIBLE
)
{
if
(
algo_perf
[
i
].
determinism
==
CUDNN_DETERMINISTIC
)
{
return
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo_perf
[
i
].
algo
));
...
...
@@ -326,8 +326,8 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
auto
&&
cast_algo
=
reinterpret_cast
<
AlgoBase
*>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
));
return
reinterpret_cast
<
AlgoBase
*>
(
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardFilterImpl
>
(
cast_algo
,
reproducible
));
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardFilterImpl
>
(
cast_algo
,
attr
));
#endif
};
...
...
@@ -348,20 +348,22 @@ ConvolutionBackwardFilterImpl::get_algorithm_heuristic(
}
if
(
args
.
src_layout
->
dtype
.
enumv
()
!=
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
if
(
reproducible
)
{
return
megdnn
::
get_reproducible_algo
<
ConvolutionBackwardFilterImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_algo_with_attribute
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_filter"
);
workspace_limit_in_bytes
,
"cuda conv bwd_filter"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_filter"
);
}
}
else
{
if
(
reproducible
)
{
return
megdnn
::
get_reproducible_algo
<
ConvolutionBackwardFilterImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_algo_with_attribute
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv bwd_filter"
);
"cuda conv bwd_filter"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
bfloat16_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/cuda/convolution/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -63,7 +63,7 @@ protected:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
@@ -77,9 +77,9 @@ public:
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
filter
,
const
CanonizedFilterMeta
&
filter_meta
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
filter
,
filter_meta
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -87,9 +87,9 @@ public:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -122,7 +122,7 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
...
...
@@ -130,7 +130,7 @@ private:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -146,9 +146,9 @@ public:
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
grad_meta
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -156,9 +156,9 @@ public:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -185,7 +185,7 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
...
...
@@ -193,7 +193,7 @@ private:
const
TensorLayout
&
grad
,
const
CanonizedFilterMeta
&
grad_meta
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
dnn/src/cuda/convolution3d/backward_data/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -75,12 +75,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/convolution3d/backward_filter/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -69,12 +69,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/convolution3d/forward/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -74,12 +74,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/convolution3d/helper.h
浏览文件 @
ec1a99ac
...
...
@@ -97,8 +97,8 @@ namespace convolution3d {
const
cudnnConvolutionDescriptor_t
conv_desc
,
const
cudnnTensorDescriptor_t
y_desc
,
size_t
workspace_limit_in_bytes
,
cudnnConvolutionFwdAlgo_t
*
algo
,
bool
reproducible
)
{
MEGDNN_MARK_USED_VAR
(
reproducible
);
const
AlgoAttribute
&
attr
)
{
MEGDNN_MARK_USED_VAR
(
attr
);
#if CUDNN_MAJOR >= 7
int
algo_max_count
=
0
;
cudnn_check
(
cudnnGetConvolutionForwardAlgorithmMaxCount
(
...
...
@@ -118,7 +118,7 @@ namespace convolution3d {
cudnn_handle
,
x_desc
,
w_desc
,
conv_desc
,
y_desc
,
algo_perf
[
i
].
algo
,
&
workspace_size
));
if
(
workspace_size
>
workspace_limit_in_bytes
)
continue
;
if
(
!
reproducible
)
{
if
(
!
(
attr
&
AlgoAttribute
::
REPRODUCIBLE
)
)
{
*
algo
=
algo_perf
[
i
].
algo
;
return
true
;
}
else
{
...
...
@@ -144,8 +144,8 @@ namespace convolution3d {
const
cudnnConvolutionDescriptor_t
conv_desc
,
const
cudnnTensorDescriptor_t
dx_desc
,
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdDataAlgo_t
*
algo
,
bool
reproducible
)
{
MEGDNN_MARK_USED_VAR
(
reproducible
);
cudnnConvolutionBwdDataAlgo_t
*
algo
,
const
AlgoAttribute
&
attr
)
{
MEGDNN_MARK_USED_VAR
(
attr
);
#if CUDNN_MAJOR >= 7
int
algo_max_count
=
0
;
cudnn_check
(
cudnnGetConvolutionBackwardDataAlgorithmMaxCount
(
...
...
@@ -166,7 +166,7 @@ namespace convolution3d {
cudnn_handle
,
w_desc
,
dy_desc
,
conv_desc
,
dx_desc
,
algo_perf
[
i
].
algo
,
&
workspace_size
));
if
(
workspace_size
>
workspace_limit_in_bytes
)
continue
;
if
(
!
reproducible
)
{
if
(
!
(
attr
&
AlgoAttribute
::
REPRODUCIBLE
)
)
{
*
algo
=
algo_perf
[
i
].
algo
;
return
true
;
}
else
{
...
...
@@ -193,8 +193,8 @@ namespace convolution3d {
const
cudnnConvolutionDescriptor_t
conv_desc
,
const
cudnnFilterDescriptor_t
dw_desc
,
size_t
workspace_limit_in_bytes
,
cudnnConvolutionBwdFilterAlgo_t
*
algo
,
bool
reproducible
)
{
MEGDNN_MARK_USED_VAR
(
reproducible
);
cudnnConvolutionBwdFilterAlgo_t
*
algo
,
const
AlgoAttribute
&
attr
)
{
MEGDNN_MARK_USED_VAR
(
attr
);
#if CUDNN_MAJOR >= 7
int
algo_max_count
=
0
;
cudnn_check
(
cudnnGetConvolutionBackwardFilterAlgorithmMaxCount
(
...
...
@@ -207,14 +207,15 @@ namespace convolution3d {
algo_max_count
,
&
algo_count
,
algo_perf
.
data
()));
for
(
int
i
=
0
;
i
<
algo_count
;
++
i
)
{
if
(
algo_perf
[
i
].
algo
==
cudnnConvolutionBwdFilterAlgo_t
::
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING
)
cudnnConvolutionBwdFilterAlgo_t
::
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING
)
continue
;
size_t
workspace_size
=
0
;
cudnn_check
(
cudnnGetConvolutionBackwardFilterWorkspaceSize
(
cudnn_handle
,
x_desc
,
dy_desc
,
conv_desc
,
dw_desc
,
algo_perf
[
i
].
algo
,
&
workspace_size
));
if
(
workspace_size
>
workspace_limit_in_bytes
)
continue
;
if
(
!
reproducible
)
{
if
(
!
(
attr
&
AlgoAttribute
::
REPRODUCIBLE
)
)
{
*
algo
=
algo_perf
[
i
].
algo
;
return
true
;
}
else
{
...
...
dnn/src/cuda/convolution3d/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -15,6 +15,7 @@
#include "./forward/algo.h"
#include "./helper.h"
#include "src/common/algo_chooser.h"
#include "src/cuda/utils.h"
using
namespace
megdnn
;
...
...
@@ -32,16 +33,16 @@ Convolution3DForwardImpl::Algorithm*
Convolution3DForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
filter
,
dst
);
return
get_algorithm_heuristic
(
src
,
fm
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
Convolution3DForwardImpl
::
Algorithm
*
Convolution3DForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
dst
);
#if CUDNN_MAJOR < 7 || (CUDNN_MAJOR == 7 && CUDNN_MINOR < 5)
...
...
@@ -49,26 +50,26 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
// prefer special chanwise impl since as the group conv of cudnn whose
// version is lower than v7.5.0 is still slower than our implementation
// in many channel-wise cases
if
(
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
chanwise
;
}
}
#endif
auto
prefer_1x1x1
=
[
&
args
,
reproducible
,
workspace_limit_in_bytes
]()
{
auto
prefer_1x1x1
=
[
&
args
,
attr
,
workspace_limit_in_bytes
]()
{
const
size_t
MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO
=
4
;
size_t
batch_size
=
args
.
src_layout
->
shape
[
0
];
if
(
batch_size
>
MAX_BATCH_SIZE_FOR_1x1x1_MAT_ALGO
)
{
return
false
;
}
return
sm_algo_pack
.
a1x1x1
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
);
return
sm_algo_pack
.
a1x1x1
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
);
};
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
reproducible
]()
->
Convolution3DForwardImpl
::
AlgoBase
*
{
attr
]()
->
Convolution3DForwardImpl
::
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
cudnnConvolutionFwdAlgo_t
algo
;
CUDNNForwardDescs
desc
;
...
...
@@ -77,11 +78,11 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
bool
got
=
cudnn_get_convolution_fwd_algo_helper
(
cudnn_handle
,
desc
.
src_desc
.
desc
,
desc
.
filter_desc
.
desc
,
desc
.
conv_desc
.
desc
,
desc
.
dst_desc
.
desc
,
workspace_limit_in_bytes
,
&
algo
,
reproducible
);
workspace_limit_in_bytes
,
&
algo
,
attr
);
if
(
got
)
{
return
static_cast
<
AlgoBase
*>
(
megdnn
::
get_
reproducible_algo
<
Convolution3DForwardImpl
>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
),
reproducible
));
megdnn
::
get_
algo_with_attribute
<
Convolution3DForwardImpl
>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
),
attr
));
}
else
{
return
nullptr
;
}
...
...
@@ -107,10 +108,10 @@ Convolution3DForwardImpl::get_algorithm_heuristic(
args
=
orig_args
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
Convolution3DForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
Convolution3DForwardImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv3d fwd"
);
"cuda conv3d fwd"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
Convolution3DForwardImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -168,28 +169,28 @@ Convolution3DBackwardDataImpl::Algorithm*
Convolution3DBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
grad
,
filter
,
diff
);
return
get_algorithm_heuristic
(
fm
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
Convolution3DBackwardDataImpl
::
Algorithm
*
Convolution3DBackwardDataImpl
::
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
chanwise
;
}
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
reproducible
]()
->
Convolution3DBackwardDataImpl
::
AlgoBase
*
{
attr
]()
->
Convolution3DBackwardDataImpl
::
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
cudnnConvolutionBwdDataAlgo_t
algo
;
CUDNNBwdDataDescs
desc
;
...
...
@@ -197,11 +198,11 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
bool
got
=
cudnn_get_convolution_bwd_data_algo_helper
(
cudnn_handle
,
desc
.
filter_desc
.
desc
,
desc
.
diff_desc
.
desc
,
desc
.
conv_desc
.
desc
,
desc
.
grad_desc
.
desc
,
workspace_limit_in_bytes
,
&
algo
,
reproducible
);
workspace_limit_in_bytes
,
&
algo
,
attr
);
if
(
got
)
{
return
static_cast
<
AlgoBase
*>
(
megdnn
::
get_
reproducible_algo
<
return
static_cast
<
AlgoBase
*>
(
megdnn
::
get_
algo_with_attribute
<
Convolution3DBackwardDataImpl
>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
),
reproducible
));
sm_algo_pack
.
cudnn_from_enum
(
algo
),
attr
));
}
else
{
return
nullptr
;
}
...
...
@@ -223,10 +224,10 @@ Convolution3DBackwardDataImpl::get_algorithm_heuristic(
args
=
orig_args
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
Convolution3DBackwardDataImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
Convolution3DBackwardDataImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv3d bwd data"
);
"cuda conv3d bwd data"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
Convolution3DBackwardDataImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -268,28 +269,28 @@ Convolution3DBackwardFilterImpl::Algorithm*
Convolution3DBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
grad
,
diff
);
return
get_algorithm_heuristic
(
src
,
diff
,
fm
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
Convolution3DBackwardFilterImpl
::
Algorithm
*
Convolution3DBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
chanwise
;
}
auto
get_cudnn_algo
=
[
this
,
&
args
,
workspace_limit_in_bytes
,
reproducible
]()
->
Convolution3DBackwardFilterImpl
::
AlgoBase
*
{
attr
]()
->
Convolution3DBackwardFilterImpl
::
AlgoBase
*
{
auto
cudnn_handle
=
cuda
::
cudnn_handle
(
this
->
handle
());
cudnnConvolutionBwdFilterAlgo_t
algo
;
CUDNNBwdFilterDescs
desc
;
...
...
@@ -297,11 +298,11 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
bool
got
=
cudnn_get_convolution_bwd_filter_algo_helper
(
cudnn_handle
,
desc
.
src_desc
.
desc
,
desc
.
diff_desc
.
desc
,
desc
.
conv_desc
.
desc
,
desc
.
grad_desc
.
desc
,
workspace_limit_in_bytes
,
&
algo
,
reproducible
);
workspace_limit_in_bytes
,
&
algo
,
attr
);
if
(
got
)
{
return
static_cast
<
AlgoBase
*>
(
megdnn
::
get_
reproducible_algo
<
return
static_cast
<
AlgoBase
*>
(
megdnn
::
get_
algo_with_attribute
<
Convolution3DBackwardFilterImpl
>
(
sm_algo_pack
.
cudnn_from_enum
(
algo
),
reproducible
));
sm_algo_pack
.
cudnn_from_enum
(
algo
),
attr
));
}
else
{
return
nullptr
;
}
...
...
@@ -322,10 +323,10 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
args
=
orig_args
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
Convolution3DBackwardFilterImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
Convolution3DBackwardFilterImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
"cuda conv3d bwd filter"
);
"cuda conv3d bwd filter"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
Convolution3DBackwardFilterImpl
>
(
sm_algo_pack
.
non_cudnn_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/cuda/convolution3d/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -25,9 +25,9 @@ public:
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
src
,
filter
,
dst
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
...
...
@@ -52,14 +52,14 @@ protected:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
...
...
@@ -73,9 +73,9 @@ public:
AlgorithmInfo
get_algorithm_info_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
filter
,
...
...
@@ -102,14 +102,14 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -126,9 +126,9 @@ public:
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
...
...
@@ -153,14 +153,14 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
dnn/src/cuda/deformable_conv/bwd_data/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -80,12 +80,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/deformable_conv/bwd_flt/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -73,12 +73,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/deformable_conv/fwd/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -68,12 +68,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/deformable_conv/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -59,10 +59,10 @@ AlgoFwd* Fwd::get_algorithm_heuristic(const TensorLayout& im,
const
TensorLayout
&
mask
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
make_canonized_filter_meta
(
im
.
ndim
,
filter
,
offset
);
return
get_algorithm_heuristic
(
im
,
fm
,
offset
,
mask
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
workspace_limit_in_bytes
,
attr
);
}
AlgoFwd
*
Fwd
::
get_algorithm_heuristic
(
const
TensorLayout
&
im
,
...
...
@@ -71,17 +71,17 @@ AlgoFwd* Fwd::get_algorithm_heuristic(const TensorLayout& im,
const
TensorLayout
&
mask
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
im
,
filter
,
offset
,
mask
,
dst
);
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
algo_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s deformable conv fwd algorithm with
args(%s) and "
megdnn_throw
(
ssprintf
(
"no deformable conv fwd algorithm with attribute%s ,
args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
()
,
workspace_limit_in_bytes
));
}
const
char
*
Fwd
::
get_algorithm_set_name
()
const
{
...
...
@@ -115,27 +115,28 @@ AlgoBwdFlt* BwdFlt::get_algorithm_heuristic(
const
TensorLayout
&
im
,
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
filter_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
auto
fm
=
make_canonized_filter_meta
(
im
.
ndim
,
filter_grad
,
offset
);
return
get_algorithm_heuristic
(
im
,
offset
,
mask
,
out_grad
,
fm
,
workspace_limit_in_bytes
,
reproducible
);
workspace_limit_in_bytes
,
attr
);
}
AlgoBwdFlt
*
BwdFlt
::
get_algorithm_heuristic
(
const
TensorLayout
&
im
,
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
CanonizedFilterMeta
&
filter_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
im
,
offset
,
mask
,
out_grad
,
filter_grad
);
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
algo_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s deformable conv bwd filter algorithm with args(%s) and "
megdnn_throw
(
ssprintf
(
"no deformable conv bwd filter algorithm with "
"attribute%s, args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
(
).
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
}
size_t
BwdFlt
::
get_workspace_in_bytes
(
...
...
@@ -175,11 +176,11 @@ AlgoBwdData* BwdData::get_algorithm_heuristic(
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
im_grad
,
const
TensorLayout
&
offset_grad
,
const
TensorLayout
&
mask_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
auto
fm
=
make_canonized_filter_meta
(
im
.
ndim
,
filter
,
offset
);
return
get_algorithm_heuristic
(
im
,
fm
,
offset
,
mask
,
out_grad
,
im_grad
,
offset_grad
,
mask_grad
,
workspace_limit_in_bytes
,
reproducible
);
workspace_limit_in_bytes
,
attr
);
}
AlgoBwdData
*
BwdData
::
get_algorithm_heuristic
(
...
...
@@ -187,18 +188,19 @@ AlgoBwdData* BwdData::get_algorithm_heuristic(
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
im_grad
,
const
TensorLayout
&
offset_grad
,
const
TensorLayout
&
mask_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
im
,
filter
,
offset
,
mask
,
out_grad
,
im_grad
,
offset_grad
,
mask_grad
);
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
algo_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
algo_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s deformable conv bwd data algorithm with args(%s) and "
megdnn_throw
(
ssprintf
(
"no deformable conv bwd data algorithm with attribute%s, "
"args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
(
).
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
}
size_t
BwdData
::
get_workspace_in_bytes
(
...
...
dnn/src/cuda/deformable_conv/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -36,7 +36,7 @@ public:
const
TensorLayout
&
mask
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
const
char
*
get_algorithm_set_name
()
const
override
;
...
...
@@ -60,7 +60,7 @@ protected:
const
TensorLayout
&
mask
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
@@ -81,7 +81,7 @@ public:
const
TensorLayout
&
out_grad
,
const
CanonizedFilterMeta
&
filter_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
im
,
const
TensorLayout
&
offset
,
...
...
@@ -111,7 +111,7 @@ protected:
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
filter_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
@@ -132,7 +132,7 @@ public:
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
im_grad
,
const
TensorLayout
&
offset_grad
,
const
TensorLayout
&
mask_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
);
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
im
,
const
TensorLayout
&
filter
,
...
...
@@ -166,7 +166,8 @@ protected:
const
TensorLayout
&
offset
,
const
TensorLayout
&
mask
,
const
TensorLayout
&
out_grad
,
const
TensorLayout
&
im_grad
,
const
TensorLayout
&
offset_grad
,
const
TensorLayout
&
mask_grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/cuda/local_share/backward_data/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -59,12 +59,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/local_share/backward_filter/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -59,12 +59,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/local_share/forward/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -60,12 +60,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/local_share/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -24,26 +24,26 @@ LocalShareForwardImpl::get_algorithm_heuristic(const TensorLayout& src,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
dst
);
if
(
sm_algo_pack
.
batch_size_aware_chwn_small_image
.
is_available_
reproducible
(
args
,
reproducible
,
.
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batch_size_aware_chwn_small_image
;
}
if
(
sm_algo_pack
.
batch_size_aware_chwn
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
batch_size_aware_chwn
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batch_size_aware_chwn
;
}
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batched_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s local share conv algorithm with
args(%s) and "
megdnn_throw
(
ssprintf
(
"no local share conv algorithm with attribute%s,
args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
()
,
workspace_limit_in_bytes
));
}
std
::
vector
<
LocalShareForwardImpl
::
Algorithm
*>
...
...
@@ -79,21 +79,21 @@ LocalShareBackwardDataImpl::Algorithm*
LocalShareBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
if
(
sm_algo_pack
.
implicit_gemm
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
implicit_gemm
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
implicit_gemm
;
}
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batched_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s local share bwd data algorithm with
args(%s) and "
megdnn_throw
(
ssprintf
(
"no local share bwd data algorithm with attribute%s
args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
Algorithm
::
attribute_str
(
attr
).
c_str
(),
args
.
to_string
().
c_str
()
,
workspace_limit_in_bytes
));
}
std
::
vector
<
LocalShareBackwardDataImpl
::
Algorithm
*>
...
...
@@ -129,20 +129,21 @@ LocalShareBackwardFilterImpl::Algorithm*
LocalShareBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
if
(
sm_algo_pack
.
implicit_gemm
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
implicit_gemm
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
implicit_gemm
;
}
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
batched_matmul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
batched_matmul
;
}
megdnn_throw
(
ssprintf
(
"no %s local share bwd filter algorithm with args(%s) and "
ssprintf
(
"no local share bwd filter algorithm with attribute%s, "
"args(%s) and "
"workspace limit (%zu bytes)"
,
reproducible
?
"reproducible"
:
"usable"
,
Algorithm
::
attribute_str
(
attr
).
c_str
()
,
args
.
to_string
().
c_str
(),
workspace_limit_in_bytes
));
}
...
...
dnn/src/cuda/local_share/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -43,7 +43,7 @@ protected:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -75,7 +75,7 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
@@ -108,7 +108,7 @@ protected:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/cuda/matrix_mul/algos.h
浏览文件 @
ec1a99ac
...
...
@@ -83,12 +83,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/cuda/matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -30,30 +30,30 @@ MatrixMulForwardImpl::get_all_algorithms(const TensorLayout& A,
MatrixMulForwardImpl
::
Algorithm
*
MatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
{
this
,
A
,
B
,
C
};
if
(
sm_algo_pack
.
cublas
.
is_available_
reproducible
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
cublas
.
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
cublas
;
}
#if CUDA_VERSION >= 10010
if
(
sm_algo_pack
.
cublas_lt
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
cublas_lt
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
cublas_lt
;
}
#endif
#if CUDA_VERSION >= 10000
if
(
sm_algo_pack
.
wmma_uint4x4x32
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
wmma_uint4x4x32
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
wmma_uint4x4x32
;
}
#endif
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
MatrixMulForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
MatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
"matrix mul forward"
);
"matrix mul forward"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
MatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/cuda/matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -61,7 +61,7 @@ protected:
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
private:
static
AlgoPack
sm_algo_pack
;
...
...
dnn/src/fallback/batched_matrix_mul/algos.h
浏览文件 @
ec1a99ac
...
...
@@ -63,12 +63,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/fallback/batched_matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -31,16 +31,16 @@ BatchedMatrixMulForwardImpl::get_all_algorithms(const TensorLayout& A,
BatchedMatrixMulForwardImpl
::
Algorithm
*
BatchedMatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
{
this
,
A
,
B
,
C
};
if
(
sm_algo_pack
.
algo_default
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
algo_default
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
algo_default
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
BatchedMatrixMulForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
BatchedMatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
"batched matrix mul forward"
);
"batched matrix mul forward"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
BatchedMatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/fallback/batched_matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -40,7 +40,7 @@ private:
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"FALLBACK BATCHED MATMUL"
;
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -280,32 +280,29 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
nullptr
);
auto
result
=
get_algorithm_heuristic_with_ncb
(
fparam
,
workspace_limit_in_bytes
,
reproducible
);
fparam
,
workspace_limit_in_bytes
,
attr
);
if
(
result
==
nullptr
)
{
result
=
naive
::
ConvBiasForwardImpl
::
get_algorithm_heuristic
(
src
,
filter
,
bias
,
z
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
src
,
filter
,
bias
,
z
,
dst
,
workspace_limit_in_bytes
,
attr
);
}
return
result
;
}
ConvBiasImpl
::
Algorithm
*
ConvBiasImpl
::
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo_data_type
=
param
.
deduce_algo_data_type
();
auto
suggest_category_order
=
suggest_algo_category_order
(
param
);
for
(
auto
category
:
suggest_category_order
)
{
auto
&&
origin_algos
=
select_algo_type
({
algo_data_type
,
category
});
ConvBiasImpl
::
Algorithm
*
heuristic_algo
=
nullptr
;
for
(
auto
i
:
origin_algos
)
{
bool
usable_reproducible
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
);
if
(
usable_reproducible
&&
bool
usable_attribute
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_attribute
(
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
attr
);
if
(
usable_attribute
&&
static_cast
<
AlgoBase
*>
(
i
)
->
get_workspace
(
param
)
<=
workspace_limit_in_bytes
)
{
//! store the first usable algo if no prefer algo, choose it as
...
...
@@ -499,8 +496,8 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm(
}
if
(
!
m_prev_selected_algo
||
memcmp
(
&
m_prev_selected_algo_sizep
,
&
param
,
sizeof
(
NCBKernSizeParam
)))
{
m_prev_selected_algo
=
get_algorithm_heuristic_with_ncb
(
param
,
workspace_size
);
m_prev_selected_algo
=
get_algorithm_heuristic_with_ncb
(
param
,
workspace_size
,
AlgoAttribute
::
DEFAULT
);
m_prev_selected_algo_sizep
=
param
;
}
return
m_prev_selected_algo
;
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -95,9 +95,7 @@ public:
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
//! size param for kernels with non-contiguous batch
struct
NCBKernSizeParam
:
ConvolutionImpl
::
NCBKernSizeParam
{
...
...
@@ -321,11 +319,11 @@ public:
return
false
;
}
bool
usable_reproducible
(
const
NCBKernSizeParam
&
param
,
bool
usable_attribute
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
,
bool
reproducible
=
true
)
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
)
const
{
return
contain_attribute
(
attr
)
&&
usable
(
param
,
algo_selection_strategy
);
}
...
...
@@ -363,7 +361,7 @@ protected:
virtual
Algorithm
*
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
const
AlgoAttribute
&
attr
);
const
char
*
get_algorithm_set_name
()
const
override
;
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -198,13 +198,13 @@ std::vector<ConvolutionImpl::Algorithm*> ConvolutionImpl::get_all_algorithms(
ConvolutionImpl
::
Algorithm
*
ConvolutionImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
nullptr
);
auto
result
=
get_algorithm_heuristic_with_ncb
(
fparam
,
workspace_limit_in_bytes
,
reproducible
);
fparam
,
workspace_limit_in_bytes
,
attr
);
if
(
result
==
nullptr
)
{
result
=
naive
::
ConvolutionForwardImpl
::
get_algorithm_heuristic
(
src
,
filter
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
src
,
filter
,
dst
,
workspace_limit_in_bytes
,
attr
);
}
return
result
;
}
...
...
@@ -312,18 +312,16 @@ void ConvolutionImpl::exec_with_ncb_kern(const NCBKernParam& param,
ConvolutionImpl
::
Algorithm
*
ConvolutionImpl
::
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo_data_type
=
param
.
deduce_algo_data_type
();
auto
suggest_category_order
=
suggest_algo_category_order
(
param
);
for
(
auto
category
:
suggest_category_order
)
{
auto
&&
origin_algos
=
select_algo_type
({
algo_data_type
,
category
});
ConvolutionImpl
::
Algorithm
*
heuristic_algo
=
nullptr
;
for
(
auto
i
:
origin_algos
)
{
bool
usable_reproducible
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
);
if
(
usable_reproducible
&&
bool
usable_attribute
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_attribute
(
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
attr
);
if
(
usable_attribute
&&
static_cast
<
AlgoBase
*>
(
i
)
->
get_workspace
(
param
)
<=
workspace_limit_in_bytes
)
{
//! store the first usable algo if no prefer algo, choose it as
...
...
@@ -392,8 +390,8 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm(
}
if
(
!
m_prev_selected_algo
||
memcmp
(
&
m_prev_selected_algo_sizep
,
&
param
,
sizeof
(
NCBKernSizeParam
)))
{
m_prev_selected_algo
=
get_algorithm_heuristic_with_ncb
(
param
,
workspace_size
);
m_prev_selected_algo
=
get_algorithm_heuristic_with_ncb
(
param
,
workspace_size
,
AlgoAttribute
::
DEFAULT
);
m_prev_selected_algo_sizep
=
param
;
}
return
m_prev_selected_algo
;
...
...
@@ -515,15 +513,15 @@ ConvolutionBackwardDataImpl::Algorithm*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
if
(
param
().
format
==
param
::
Convolution
::
Format
::
NHWCD4
||
param
().
format
==
param
::
Convolution
::
Format
::
NCHW4
)
{
return
naive
::
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
);
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
attr
);
}
auto
fparam
=
make_ncb_kern_size_param
(
filter
,
diff
,
grad
);
return
get_algorithm_heuristic_with_ncb
(
fparam
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
ConvolutionBackwardDataImpl
::
NCBKernSizeParam
...
...
@@ -668,15 +666,15 @@ ConvolutionBackwardDataImpl::get_all_algorithms_with_ncb(
ConvolutionBackwardDataImpl
::
Algorithm
*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
if
(
param
.
filter_meta
.
group
!=
1
)
{
auto
p1g
=
param
;
p1g
.
filter_meta
.
group
=
1
;
return
ncb_1g_get_algorithm_heuristic
(
p1g
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
return
ncb_1g_get_algorithm_heuristic
(
param
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
size_t
ConvolutionBackwardDataImpl
::
ncb_1g_get_workspace
(
...
...
@@ -731,14 +729,10 @@ ConvolutionBackwardDataImpl::ncb_1g_get_all_algorithms(
ConvolutionBackwardDataImpl
::
Algorithm
*
ConvolutionBackwardDataImpl
::
ncb_1g_get_algorithm_heuristic
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
for
(
auto
i
:
ncb_1g_get_all_algorithms
(
param
))
{
if
(
ncb_1g_get_workspace
(
i
,
param
)
<=
workspace_limit_in_bytes
)
{
if
(
reproducible
)
{
if
(
i
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
{
return
i
;
}
}
else
{
if
(
i
->
contain_attribute
(
attr
))
{
return
i
;
}
}
...
...
@@ -788,7 +782,8 @@ ConvolutionBackwardDataImpl::get_algorithm(const NCBKernSizeParam& param) {
if
(
!
m_prev_selected_algo
||
memcmp
(
&
m_prev_selected_algo_sizep
,
&
param
,
sizeof
(
NCBKernSizeParam
)))
{
m_prev_selected_algo
=
ncb_1g_get_algorithm_heuristic
(
param
,
std
::
numeric_limits
<
size_t
>::
max
());
param
,
std
::
numeric_limits
<
size_t
>::
max
(),
AlgoAttribute
::
DEFAULT
);
m_prev_selected_algo_sizep
=
param
;
}
return
m_prev_selected_algo
;
...
...
dnn/src/fallback/convolution/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -90,7 +90,7 @@ public:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
//! size param for kernels with non-contiguous batch
struct
NCBKernSizeParam
{
...
...
@@ -238,11 +238,11 @@ public:
return
false
;
}
bool
usable_reproducible
(
const
NCBKernSizeParam
&
param
,
bool
usable_attribute
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
,
bool
reproducible
=
true
)
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
)
const
{
return
contain_attribute
(
attr
)
&&
usable
(
param
,
algo_selection_strategy
);
}
...
...
@@ -272,7 +272,7 @@ protected:
virtual
Algorithm
*
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
const
AlgoAttribute
&
attr
);
const
char
*
get_algorithm_set_name
()
const
override
;
...
...
@@ -326,7 +326,7 @@ public:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
;
//! size param for kernels with non-contiguous batch
...
...
@@ -421,12 +421,10 @@ protected:
virtual
ncb_kern_t
dispatch_kern
(
ConvolutionBackwardDataImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
bool
usable_reproducible
(
ConvolutionBackwardDataImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
bool
reproducible
=
true
)
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
usable
(
opr
,
param
);
bool
usable_attribute
(
ConvolutionBackwardDataImpl
*
opr
,
const
NCBKernSizeParam
&
param
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
)
const
{
return
contain_attribute
(
attr
)
&&
usable
(
opr
,
param
);
}
virtual
bool
is_preferred
(
const
NCBKernSizeParam
&
)
const
{
return
false
;
...
...
@@ -451,7 +449,7 @@ protected:
//! default impl calls ncb_1g_get_algorithm_heuristic()
virtual
Algorithm
*
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
const
AlgoAttribute
&
attr
);
//! get kernel pointer for float32 non-contiguous batch 1-group kernel
virtual
ncb_kern_t
ncb_1g_dispatch_kern
(
Algorithm
*
algo
,
...
...
@@ -469,7 +467,7 @@ protected:
*/
virtual
Algorithm
*
ncb_1g_get_algorithm_heuristic
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
const
AlgoAttribute
&
attr
);
static
bool
is_matrix_mul_preferred
(
const
NCBKernSizeParam
&
param
);
/**
...
...
dnn/src/fallback/matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -131,19 +131,20 @@ MatrixMulImpl::Algorithm* MatrixMulImpl::get_algorithm_from_desc(
MatrixMul
::
Algorithm
*
MatrixMulImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
auto
kern_size_param
=
make_kern_size_param
(
A
,
B
,
C
);
if
(
auto
algo
=
static_cast
<
AlgoBase
*>
(
get_algorithm_from_desc
(
execution_policy
().
algo
)))
{
megdnn_assert
(
algo
->
get_workspace
(
kern_size_param
)
<
workspace_limit_in_bytes
);
auto
cur
=
megdnn
::
get_reproducible_algo
<
MatrixMulImpl
>
(
algo
,
reproducible
);
auto
cur
=
megdnn
::
get_algo_with_attribute
<
MatrixMulImpl
>
(
algo
,
attr
);
if
(
cur
)
return
cur
;
megdnn_throw
(
"require reproducible algorithm, but given algorithm is not "
"reproducible"
);
megdnn_throw
(
ssprintf
(
"require algorithm with attribute%s, but given algorithm with "
"attribute%s"
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
()));
}
AlgoTypePack
algo_type
;
algo_type
.
data_type
=
kern_size_param
.
deduce_algo_data_type
();
...
...
@@ -155,8 +156,8 @@ MatrixMul::Algorithm* MatrixMulImpl::get_algorithm_heuristic(
if
(
static_cast
<
AlgoBase
*>
(
algo
)
->
usable
(
kern_size_param
)
&&
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
kern_size_param
)
<=
workspace_limit_in_bytes
)
{
if
(
static_cast
<
AlgoBase
*>
(
algo
)
->
preferred_
reproducibl
e
(
kern_size_param
,
reproducible
))
{
if
(
static_cast
<
AlgoBase
*>
(
algo
)
->
preferred_
attribut
e
(
kern_size_param
,
attr
))
{
//! use gemv algo if it's prefered
if
(
algo
->
algoset
()
==
AlgoBase
::
AlgoSet
::
ALGO_TYPE_GEMV
)
{
return
algo
;
...
...
@@ -214,8 +215,9 @@ MatrixMulImpl::KernParam MatrixMulImpl::make_kern_param(
size_t
MatrixMulImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
)
{
if
(
auto
algo
=
get_algorithm_heuristic
(
A
,
B
,
C
,
std
::
numeric_limits
<
size_t
>::
max
(),
false
))
{
if
(
auto
algo
=
get_algorithm_heuristic
(
A
,
B
,
C
,
std
::
numeric_limits
<
size_t
>::
max
(),
AlgoAttribute
::
DEFAULT
))
{
auto
kern_size_param
=
make_kern_size_param
(
A
,
B
,
C
);
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
kern_size_param
);
}
...
...
@@ -228,7 +230,7 @@ void MatrixMulImpl::exec(_megdnn_tensor_in A, _megdnn_tensor_in B,
if
(
auto
algo
=
get_algorithm_heuristic
(
A
.
layout
,
B
.
layout
,
C
.
layout
,
std
::
numeric_limits
<
size_t
>::
max
(),
false
))
{
AlgoAttribute
::
DEFAULT
))
{
auto
kern_param
=
make_kern_param
(
A
,
B
,
C
,
workspace
);
auto
kern
=
static_cast
<
AlgoBase
*>
(
algo
)
->
get_kern
(
kern_param
);
auto
run
=
[
kern
,
kern_param
]()
{
kern
(
kern_param
);
};
...
...
dnn/src/fallback/matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -223,11 +223,10 @@ public:
virtual
InnerBlockSize
get_inner_block_size
()
const
{
megdnn_assert
(
0
);
};
bool
preferred_reproducible
(
const
KernSizeParam
&
param
,
bool
reproducible
=
true
)
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
preferred
(
param
);
bool
preferred_attribute
(
const
KernSizeParam
&
param
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
)
{
return
contain_attribute
(
attr
)
&&
preferred
(
param
);
};
virtual
MatmulDescription
matmul_description
()
const
=
0
;
...
...
@@ -272,7 +271,7 @@ protected:
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
};
...
...
dnn/src/naive/batch_conv_bias/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -125,16 +125,14 @@ BatchConvBiasForwardImpl::get_algorithm_heuristic(
const
TensorLayout
&
/* bias */
,
const
TensorLayout
&
/* z */
,
const
TensorLayout
&
/* dst */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_batch_conv_bias_fwd_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
dnn/src/naive/batch_conv_bias/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -37,7 +37,7 @@ public:
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
...
...
dnn/src/naive/batched_matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -76,7 +76,7 @@ BatchedMatrixMulForward::Algorithm*
BatchedMatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/*A*/
,
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/* reproducible
*/
)
{
const
AlgoAttribute
&
/*attr
*/
)
{
return
static_cast
<
HandleImpl
*>
(
handle
())
->
default_batched_matmul_fwd_algo
();
}
...
...
dnn/src/naive/batched_matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -32,7 +32,7 @@ public:
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/* reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
...
...
dnn/src/naive/conv_bias/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -246,16 +246,14 @@ ConvBiasForward::Algorithm* ConvBiasForwardImpl::get_algorithm_heuristic(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* filter */
,
const
TensorLayout
&
/* bias */
,
const
TensorLayout
&
/* z */
,
const
TensorLayout
&
/* dst */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv_bias_fwd_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
dnn/src/naive/conv_bias/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -37,7 +37,7 @@ public:
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
...
...
dnn/src/naive/convolution/convolution.cpp
浏览文件 @
ec1a99ac
...
...
@@ -272,16 +272,14 @@ ConvolutionForwardImpl:: get_all_algorithms(const TensorLayout &,
ConvolutionForward
::
Algorithm
*
ConvolutionForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv_fwd_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -304,16 +302,14 @@ ConvolutionBackwardData::Algorithm*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* filter */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv_bwd_data_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -337,16 +333,14 @@ ConvolutionBackwardFilter::Algorithm*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv_bwd_filter_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
dnn/src/naive/convolution/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -29,7 +29,7 @@ class ConvolutionForwardImpl: public ConvolutionForward {
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
PreprocessedFilter
*
)
override
{
...
...
@@ -71,7 +71,7 @@ class ConvolutionBackwardDataImpl: public ConvolutionBackwardData {
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
;
...
...
@@ -94,7 +94,7 @@ class ConvolutionBackwardFilterImpl: public ConvolutionBackwardFilter {
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
;
...
...
dnn/src/naive/convolution3d/convolution3d.cpp
浏览文件 @
ec1a99ac
...
...
@@ -120,15 +120,13 @@ Convolution3DForward::Algorithm*
Convolution3DForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* filter */
,
const
TensorLayout
&
/* dst */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv3d_fwd_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -152,16 +150,14 @@ Convolution3DBackwardData::Algorithm*
Convolution3DBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* filter */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv3d_bwd_data_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -187,16 +183,14 @@ Convolution3DBackwardFilterImpl::get_algorithm_heuristic(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_conv3d_bwd_filter_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
dnn/src/naive/convolution3d/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -26,7 +26,7 @@ public:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
...
...
@@ -48,7 +48,7 @@ public:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
...
...
@@ -70,7 +70,7 @@ public:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
...
...
dnn/src/naive/deformable_conv/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -32,7 +32,7 @@ public:
const
TensorLayout
&
/* mask */
,
const
TensorLayout
&
/* dst */
,
size_t
/* workspace_limit_in_bytes */
,
bool
/* reproducible
*/
)
override
{
const
AlgoAttribute
&
/*attr
*/
)
override
{
return
nullptr
;
};
...
...
@@ -74,7 +74,7 @@ public:
const
TensorLayout
&
/* out_grad */
,
const
TensorLayout
&
/* filter_grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
/* reproducible
*/
)
override
{
const
AlgoAttribute
&
/*attr
*/
)
override
{
return
nullptr
;
};
...
...
@@ -121,7 +121,7 @@ public:
const
TensorLayout
&
/* offset_grad */
,
const
TensorLayout
&
/* mask_grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
/* reproducible
*/
)
override
{
const
AlgoAttribute
&
/*attr
*/
)
override
{
return
nullptr
;
};
...
...
dnn/src/naive/local_share/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -162,16 +162,14 @@ LocalShareForwardImpl::get_all_algorithms(const TensorLayout&,
LocalShareForward
::
Algorithm
*
LocalShareForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_local_share_fwd_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -196,16 +194,14 @@ LocalShareBackwardData::Algorithm*
LocalShareBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* filter */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_local_share_bwd_data_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
@@ -230,16 +226,14 @@ LocalShareBackwardFilter::Algorithm*
LocalShareBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/* src */
,
const
TensorLayout
&
/* diff */
,
const
TensorLayout
&
/* grad */
,
size_t
/* workspace_limit_in_bytes */
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
algo
=
static_cast
<
HandleImpl
*>
(
handle
())
->
default_local_share_bwd_filter_algo
();
if
(
reproducible
)
{
megdnn_assert
(
algo
->
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
),
"require reproducible algorithm, but heuristic "
"algorithm(%s) is not "
"reproducible"
,
algo
->
name
());
}
megdnn_assert
(
algo
->
contain_attribute
(
attr
),
"require algorithm with attribute%s, but heuristic "
"algorithm(%s) with attribute%s "
,
Algorithm
::
attribute_str
(
attr
).
c_str
(),
algo
->
name
(),
Algorithm
::
attribute_str
(
algo
->
attribute
()).
c_str
());
return
algo
;
}
...
...
dnn/src/naive/local_share/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -34,7 +34,7 @@ public:
const
TensorLayout
&
/*filter*/
,
const
TensorLayout
&
/*dst*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"DEFAULT"
;
}
...
...
@@ -59,7 +59,7 @@ public:
const
TensorLayout
&
/*diff*/
,
const
TensorLayout
&
/*grad*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"DEFAULT"
;
}
...
...
@@ -84,7 +84,7 @@ public:
const
TensorLayout
&
/*diff*/
,
const
TensorLayout
&
/*grad*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"DEFAULT"
;
}
...
...
dnn/src/naive/matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -91,7 +91,7 @@ MatrixMulForwardImpl::get_all_algorithms(const TensorLayout& /*A*/,
MatrixMulForward
::
Algorithm
*
MatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
/*A*/
,
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/* reproducible
*/
)
{
const
AlgoAttribute
&
/*attr
*/
)
{
return
static_cast
<
HandleImpl
*>
(
handle
())
->
default_matmul_fwd_algo
();
}
...
...
dnn/src/naive/matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -33,7 +33,7 @@ public:
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/* reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
Algorithm
*
get_algorithm_from_desc
(
const
AlgorithmDesc
&
)
override
;
...
...
dnn/src/rocm/batched_matrix_mul/algos.h
浏览文件 @
ec1a99ac
...
...
@@ -70,12 +70,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/rocm/batched_matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -32,16 +32,16 @@ BatchedMatrixMulForwardImpl::get_all_algorithms(const TensorLayout& A,
BatchedMatrixMulForwardImpl
::
Algorithm
*
BatchedMatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
{
this
,
A
,
B
,
C
};
if
(
sm_algo_pack
.
blas
.
is_available_
reproducible
(
args
,
reproducible
,
if
(
sm_algo_pack
.
blas
.
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
blas
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
BatchedMatrixMulForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
BatchedMatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
"batched matrix mul forward"
);
"batched matrix mul forward"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
BatchedMatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/rocm/batched_matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -40,7 +40,7 @@ private:
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"ROCM BATCHED MATMUL"
;
...
...
dnn/src/rocm/convolution/backward_data/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -74,12 +74,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
@@ -96,24 +95,20 @@ public:
};
class
ConvolutionBackwardDataImpl
::
AlgoMIOpen
final
:
public
AlgoBase
{
bool
m_is_reproducibl
e
;
AlgoAttribute
m_algo_attribut
e
;
const
char
*
m_name
;
miopenConvBwdDataAlgorithm_t
find_best_algo
(
const
ExecArgs
&
args
);
public:
AlgoMIOpen
()
=
delete
;
AlgoMIOpen
(
bool
is_reproducible
)
:
m_is_reproducible
(
is_reproducible
)
{}
AlgoMIOpen
(
AlgoAttribute
attr
)
:
m_algo_attribute
(
attr
)
{}
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_is_reproducible
)
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
m_algo_attribute
;
}
const
char
*
name
()
const
override
{
...
...
@@ -124,7 +119,7 @@ public:
MEGDNN_DECL_ALGO_TYPE
(
ROCM_MIOPEN
)
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_
is_reproducibl
e
,
ret
);
serialize_write_pod
(
m_
algo_attribut
e
,
ret
);
return
ret
;
}
...
...
@@ -170,7 +165,7 @@ class ConvolutionBackwardDataImpl::AlgoPack : NonCopyableObj {
public:
AlgoPack
();
AlgoMIOpen
miopen
{
true
};
AlgoMIOpen
miopen
{
AlgoAttribute
::
REPRODUCIBLE
};
AlgoMatmul
matmul
;
AlgoChanwise
chanwise
;
...
...
dnn/src/rocm/convolution/backward_filter/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -71,12 +71,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
@@ -93,25 +92,21 @@ public:
};
class
ConvolutionBackwardFilterImpl
::
AlgoMIOpen
final
:
public
AlgoBase
{
bool
m_is_reproducibl
e
;
AlgoAttribute
m_algo_attribut
e
;
const
char
*
m_name
;
miopenConvBwdWeightsAlgorithm_t
find_best_algo
(
const
ExecArgs
&
args
);
public:
AlgoMIOpen
()
=
delete
;
AlgoMIOpen
(
bool
is_reproducible
)
:
m_is_reproducible
(
is_reproducible
)
{}
AlgoMIOpen
(
AlgoAttribute
attr
)
:
m_algo_attribute
(
attr
)
{}
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_is_reproducible
)
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
m_algo_attribute
;
}
const
char
*
name
()
const
override
{
return
"MIOpenConvolutionBackwardFilter"
;
...
...
@@ -121,7 +116,7 @@ public:
MEGDNN_DECL_ALGO_TYPE
(
ROCM_MIOPEN
)
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_
is_reproducibl
e
,
ret
);
serialize_write_pod
(
m_
algo_attribut
e
,
ret
);
return
ret
;
}
...
...
@@ -166,7 +161,7 @@ class ConvolutionBackwardFilterImpl::AlgoPack : NonCopyableObj {
public:
AlgoPack
();
AlgoMIOpen
miopen
{
true
};
AlgoMIOpen
miopen
{
AlgoAttribute
::
REPRODUCIBLE
};
AlgoMatmul
matmul
;
AlgoChanwise
chanwise
;
...
...
dnn/src/rocm/convolution/forward/algo.h
浏览文件 @
ec1a99ac
...
...
@@ -73,12 +73,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
...
...
@@ -94,25 +93,21 @@ public:
};
class
ConvolutionForwardImpl
::
AlgoMIOpen
final
:
public
AlgoBase
{
bool
m_is_reproducibl
e
;
AlgoAttribute
m_algo_attribut
e
;
const
char
*
m_name
;
miopenConvFwdAlgorithm_t
find_best_algo
(
const
ExecArgs
&
args
);
public:
AlgoMIOpen
()
=
delete
;
AlgoMIOpen
(
bool
is_reproducible
)
:
m_is_reproducible
(
is_reproducible
)
{}
AlgoMIOpen
(
AlgoAttribute
attr
)
:
m_algo_attribute
(
attr
)
{}
bool
is_available
(
const
SizeArgs
&
args
)
const
override
;
size_t
get_workspace_in_bytes
(
const
SizeArgs
&
args
)
const
override
;
void
exec
(
const
ExecArgs
&
args
)
const
override
;
AlgoAttribute
attribute
()
const
override
{
auto
ret
=
static_cast
<
AlgoAttribute
>
(
0
);
if
(
m_is_reproducible
)
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
return
m_algo_attribute
;
}
const
char
*
name
()
const
override
{
return
"MIOpenConvolutionForward"
;
}
...
...
@@ -121,7 +116,7 @@ public:
MEGDNN_DECL_ALGO_TYPE
(
ROCM_MIOPEN
)
std
::
string
param
()
const
override
{
std
::
string
ret
;
serialize_write_pod
(
m_
is_reproducibl
e
,
ret
);
serialize_write_pod
(
m_
algo_attribut
e
,
ret
);
return
ret
;
}
...
...
@@ -215,7 +210,7 @@ class ConvolutionForwardImpl::AlgoPack : NonCopyableObj {
public:
AlgoPack
();
AlgoMIOpen
miopen
{
true
};
AlgoMIOpen
miopen
{
AlgoAttribute
::
REPRODUCIBLE
};
AlgoMatmul
matmul
;
AlgoInplaceMatmul
inplace_matmul
;
Algo1x1
a1x1
;
...
...
dnn/src/rocm/convolution/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -33,70 +33,69 @@ ConvolutionForwardImpl::get_algorithm_heuristic(const TensorLayout& src,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
filter
,
dst
);
return
get_algorithm_heuristic
(
src
,
fm
,
dst
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
ConvolutionForwardImpl
::
Algorithm
*
ConvolutionForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
filter
,
dst
);
//! MIOpen auto-tuning need to run with actual tensors, so we cannot get
//! best algorithm here.
if
(
is_miopen_supported
(
args
))
{
auto
algo
=
megdnn
::
get_
reproducible_algo
<
ConvolutionForwardImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
reproducible
);
auto
algo
=
megdnn
::
get_
algo_with_attribute
<
ConvolutionForwardImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
attr
);
if
(
algo
)
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
)
{
if
(
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
chanwise
;
}
}
auto
prefer_1x1
=
[
&
args
,
reproducible
,
workspace_limit_in_bytes
]()
{
auto
prefer_1x1
=
[
&
args
,
attr
,
workspace_limit_in_bytes
]()
{
const
size_t
MAX_BATCH_SIZE_FOR_1x1_MAT_ALGO
=
4
;
size_t
batch_size
=
args
.
src_layout
->
shape
[
0
];
if
(
batch_size
>
MAX_BATCH_SIZE_FOR_1x1_MAT_ALGO
)
{
return
false
;
}
return
sm_algo_pack
.
a1x1
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
);
return
sm_algo_pack
.
a1x1
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
);
};
if
(
prefer_1x1
())
{
return
&
sm_algo_pack
.
a1x1
;
}
auto
prefer_1x1_large_batch
=
[
&
args
,
reproducible
,
workspace_limit_in_bytes
]()
{
auto
prefer_1x1_large_batch
=
[
&
args
,
attr
,
workspace_limit_in_bytes
]()
{
const
size_t
MIN_BATCH_SIZE_FOR_1x1_LARGE_BATCH_ALGO
=
32
;
size_t
batch_size
=
args
.
src_layout
->
shape
[
0
];
if
(
batch_size
<
MIN_BATCH_SIZE_FOR_1x1_LARGE_BATCH_ALGO
)
{
return
false
;
}
return
sm_algo_pack
.
batched_matrix_mul
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
);
return
sm_algo_pack
.
batched_matrix_mul
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
);
};
if
(
prefer_1x1_large_batch
())
{
return
&
sm_algo_pack
.
batched_matrix_mul
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionForwardImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
"rocm conv fwd"
);
"rocm conv fwd"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionForwardImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -157,36 +156,36 @@ ConvolutionBackwardDataImpl::Algorithm*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
grad
,
filter
,
diff
);
return
get_algorithm_heuristic
(
fm
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
ConvolutionBackwardDataImpl
::
Algorithm
*
ConvolutionBackwardDataImpl
::
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
filter
,
diff
,
grad
);
if
(
is_miopen_supported
(
args
.
as_fwd_args
()))
{
auto
algo
=
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
reproducible
);
auto
algo
=
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
attr
);
if
(
algo
)
return
algo
;
}
if
(
args
.
filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
chanwise
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardDataImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
"rocm conv bwd_data"
);
"rocm conv bwd_data"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardDataImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
...
...
@@ -230,38 +229,38 @@ ConvolutionBackwardFilterImpl::Algorithm*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
auto
fm
=
check_layout_fwd
(
src
,
grad
,
diff
);
return
get_algorithm_heuristic
(
src
,
diff
,
fm
,
workspace_limit_in_bytes
,
reproducible
);
attr
);
}
ConvolutionBackwardFilterImpl
::
Algorithm
*
ConvolutionBackwardFilterImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
(
this
,
src
,
diff
,
grad
);
if
(
is_miopen_supported
(
args
.
as_fwd_args
()))
{
auto
algo
=
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
reproducible
);
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
miopen_algos
[
0
],
attr
);
if
(
algo
)
return
algo
;
}
if
(
args
.
grad_filter_meta
.
group
>
1
&&
sm_algo_pack
.
chanwise
.
is_available_
reproducibl
e
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
sm_algo_pack
.
chanwise
.
is_available_
attribut
e
(
args
,
attr
,
workspace_limit_in_bytes
))
{
// prefer special chanwise impl
return
&
sm_algo_pack
.
chanwise
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
ConvolutionBackwardFilterImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
"rocm conv bwd_filter"
);
"rocm conv bwd_filter"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
ConvolutionBackwardFilterImpl
>
(
sm_algo_pack
.
non_miopen_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/rocm/convolution/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -26,9 +26,9 @@ public:
AlgorithmInfo
get_algorithm_info_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
src
,
filter
,
dst
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
...
...
@@ -76,12 +76,12 @@ private:
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -94,9 +94,9 @@ public:
AlgorithmInfo
get_algorithm_info_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
filter
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
filter
,
...
...
@@ -122,12 +122,12 @@ private:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
Algorithm
*
get_algorithm_heuristic
(
const
CanonizedFilterMeta
&
filter
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
@@ -141,9 +141,9 @@ public:
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
const
AlgoAttribute
&
attr
)
{
return
get_algorithm_heuristic
(
src
,
diff
,
grad
,
workspace_limit_in_bytes
,
reproducible
)
workspace_limit_in_bytes
,
attr
)
->
info
();
}
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
...
...
@@ -169,12 +169,12 @@ private:
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
override
;
const
AlgoAttribute
&
attr
)
override
;
Algorithm
*
get_algorithm_heuristic
(
const
TensorLayout
&
src
,
const
TensorLayout
&
diff
,
const
CanonizedFilterMeta
&
grad
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
);
const
AlgoAttribute
&
attr
);
static
AlgoPack
sm_algo_pack
;
};
...
...
dnn/src/rocm/matrix_mul/algos.h
浏览文件 @
ec1a99ac
...
...
@@ -70,12 +70,11 @@ public:
bool
is_available_wk
(
const
SizeArgs
&
args
,
size_t
limit
)
const
{
return
is_available
(
args
)
&&
get_workspace_in_bytes
(
args
)
<=
limit
;
}
bool
is_available_reproducible
(
const
SizeArgs
&
args
,
bool
reproducible
=
true
,
bool
is_available_attribute
(
const
SizeArgs
&
args
,
const
AlgoAttribute
&
attr
=
AlgoAttribute
::
REPRODUCIBLE
,
size_t
limit
=
std
::
numeric_limits
<
size_t
>::
max
())
const
{
return
(
!
reproducible
||
contain_attribute
(
AlgoAttribute
::
REPRODUCIBLE
))
&&
is_available_wk
(
args
,
limit
);
return
contain_attribute
(
attr
)
&&
is_available_wk
(
args
,
limit
);
}
AlgoBase
&
check_workspace
(
const
SizeArgs
&
args
,
const
Workspace
&
workspace
)
{
...
...
dnn/src/rocm/matrix_mul/opr_impl.cpp
浏览文件 @
ec1a99ac
...
...
@@ -29,16 +29,16 @@ MatrixMulForwardImpl::get_all_algorithms(const TensorLayout& A,
MatrixMulForwardImpl
::
Algorithm
*
MatrixMulForwardImpl
::
get_algorithm_heuristic
(
const
TensorLayout
&
A
,
const
TensorLayout
&
B
,
const
TensorLayout
&
C
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
size_t
workspace_limit_in_bytes
,
const
AlgoAttribute
&
attr
)
{
AlgoBase
::
SizeArgs
args
{
this
,
A
,
B
,
C
};
if
(
sm_algo_pack
.
blas
.
is_available_
reproducible
(
args
,
reproducible
,
workspace_limit_in_bytes
))
{
if
(
sm_algo_pack
.
blas
.
is_available_
attribute
(
args
,
attr
,
workspace_limit_in_bytes
))
{
return
&
sm_algo_pack
.
blas
;
}
if
(
reproducible
)
{
return
megdnn
::
get_
reproducible_algo
<
MatrixMulForwardImpl
>
(
if
(
attr
!=
AlgoAttribute
::
DEFAULT
)
{
return
megdnn
::
get_
algo_with_attribute
<
MatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
"matrix mul forward"
);
"matrix mul forward"
,
attr
);
}
else
{
return
megdnn
::
get_usable_algo
<
MatrixMulForwardImpl
>
(
sm_algo_pack
.
all_algos
,
args
,
workspace_limit_in_bytes
,
...
...
dnn/src/rocm/matrix_mul/opr_impl.h
浏览文件 @
ec1a99ac
...
...
@@ -40,7 +40,7 @@ private:
const
TensorLayout
&
/*B*/
,
const
TensorLayout
&
/*C*/
,
size_t
/*workspace_limit_in_bytes*/
,
bool
/*reproducible
*/
)
override
;
const
AlgoAttribute
&
/*attr
*/
)
override
;
const
char
*
get_algorithm_set_name
()
const
override
{
return
"ROCM MATMUL"
;
...
...
src/opr/impl/search_policy/algo_chooser.cpp
浏览文件 @
ec1a99ac
...
...
@@ -278,6 +278,15 @@ std::vector<megdnn::Algorithm::SearchItem> flatten_search_space(
return
ret
;
}
AlgoAttribute
extract_algo_attribute_from_execution_strategy
(
const
ExecutionStrategy
&
strategy
)
{
AlgoAttribute
ret
=
AlgoAttribute
::
DEFAULT
;
if
(
strategy
&
ExecutionStrategy
::
REPRODUCIBLE
)
{
ret
|=
AlgoAttribute
::
REPRODUCIBLE
;
}
return
ret
;
}
//! Test whether the algo attribute of a algo match the require
//! algo_strategy
static
bool
algo_attribute_match_strategy
(
AlgoAttribute
attribute
,
...
...
@@ -290,7 +299,6 @@ static bool algo_attribute_match_strategy(AlgoAttribute attribute,
}
return
ret
;
}
}
// namespace
namespace
mgb
{
...
...
@@ -303,9 +311,9 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx,
return
;
AlgoChooserProfileCache
::
Result
prof_rst
;
std
::
string
str_on_inp_shape
=
ssprintf
(
"on input layouts (%s, %s)"
,
ctx
.
layouts
()[
0
].
to_string
().
c_str
(),
ctx
.
layouts
()[
1
].
to_string
().
c_str
()
);
auto
target_attribute
=
extract_algo_attribute_from_execution_strategy
(
selected_strategy
);
std
::
string
layouts_str
=
format_fixlayouts
<
Opr
>
(
ctx
.
layouts
(),
arity_in
,
arity_out
);
double
cur_timeout
=
0
;
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
...
...
@@ -316,20 +324,22 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx,
Maybe
<
AlgoChooserProfileCache
::
ResultEntry
>
cur_rst
;
std
::
string
msg
=
ssprintf
(
"profiling %s algorithm %s %s"
,
ctx
.
mgb_opr
()
->
dyn_typeinfo
()
->
name
,
algo
.
name
.
c_str
(),
str_on_inp_shape
.
c_str
());
algo
.
name
.
c_str
(),
layouts_str
.
c_str
());
ImplExecutionPolicy
policy
;
policy
.
algo
=
algo
.
desc
;
ctx
.
construct_execution_policy
(
selected_strategy
,
policy
);
if
(
ctx
.
get_workspace_size_bytes
(
policy
)
>=
workspace_limit
)
{
continue
;
}
auto
algo_attribute
=
ctx
.
megdnn_opr
()
->
get_algorithm_from_desc
(
policy
.
algo
)
->
attribute
();
if
(
!
algo_attribute_match_strategy
(
algo_attribute
,
selected_strategy
))
{
auto
palgo
=
ctx
.
megdnn_opr
()
->
get_algorithm_from_desc
(
policy
.
algo
);
if
(
!
algo_attribute_match_strategy
(
palgo
->
attribute
(),
selected_strategy
))
{
mgb_log_debug
(
"skip algo %s, which is not match the profile strategy."
,
algo
.
name
.
c_str
());
"skip algo %s with attribute%s, which is not match the "
"profile strategy required attribute%s."
,
algo
.
name
.
c_str
(),
Algorithm
::
attribute_str
(
palgo
->
attribute
()).
c_str
(),
Algorithm
::
attribute_str
(
target_attribute
).
c_str
());
continue
;
}
...
...
@@ -360,9 +370,10 @@ void AlgoChooser<Opr>::profile(ExeContext& ctx,
rst
.
workspace
,
rst
.
time
);
prof_rst
.
push_back
(
rst
);
}
std
::
string
msg
=
ssprintf
(
"no usable %s algorithm %s"
,
ctx
.
mgb_opr
()
->
dyn_typeinfo
()
->
name
,
str_on_inp_shape
.
c_str
());
std
::
string
msg
=
ssprintf
(
"no usable %s algorithm %s with attribute(%s)"
,
ctx
.
mgb_opr
()
->
dyn_typeinfo
()
->
name
,
layouts_str
.
c_str
(),
Algorithm
::
attribute_str
(
target_attribute
).
c_str
());
mgb_assert
(
!
prof_rst
.
empty
(),
"%s"
,
msg
.
c_str
());
FixedTensorLayouts
origin_layouts
=
ctx
.
layouts
();
...
...
@@ -589,14 +600,15 @@ AlgoChooser<Opr>::ExeContext::choose_by_heuristic(
"workspace_limit should not be setted if choose algo by "
"heuristic"
);
}
bool
reproducible
=
static_cast
<
bool
>
(
selected_strategy
&
ExecutionStrategy
::
REPRODUCIBLE
);
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
owner_graph
(),
m_cn
,
m_execution_policy
.
workspace_limit
);
ImplExecutionPolicy
policy
;
policy
.
algo
=
APPLY
(
m_megdnn_opr
->
get_algorithm_info_heuristic
(
args
...,
workspace_limit
,
reproducible
),
m_layouts
).
desc
;
args
...,
workspace_limit
,
extract_algo_attribute_from_execution_strategy
(
selected_strategy
)),
m_layouts
)
.
desc
;
Algorithm
*
algo
=
m_megdnn_opr
->
get_algorithm_from_desc
(
policy
.
algo
);
mgb_assert
(
algo
,
"Unknown algo description"
);
...
...
@@ -647,8 +659,6 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
ExecutionStrategy
selected_strategy
,
typename
AlgoChooser
<
Opr
>::
ImplExecutionPolicy
&
policy
,
bool
retrive_from_cache
)
const
{
bool
reproducible
=
static_cast
<
bool
>
(
selected_strategy
&
ExecutionStrategy
::
REPRODUCIBLE
);
if
(
!
policy
.
algo
.
valid
())
{
if
(
retrive_from_cache
)
{
policy
.
algo
=
...
...
@@ -656,9 +666,11 @@ void AlgoChooser<Opr>::ExeContext::construct_execution_policy(
}
else
{
auto
workspace_limit
=
WorkspaceLimitGetter
::
get_workspace_limit
(
owner_graph
(),
m_cn
,
m_execution_policy
.
workspace_limit
);
policy
.
algo
=
APPLY
(
m_megdnn_opr
->
get_algorithm_info_heuristic
(
policy
.
algo
=
APPLY
(
m_megdnn_opr
->
get_algorithm_info_heuristic
(
args
...,
workspace_limit
,
reproducible
),
extract_algo_attribute_from_execution_strategy
(
selected_strategy
)),
m_layouts
)
.
desc
;
}
...
...
src/opr/test/dnn/convolution.cpp
浏览文件 @
ec1a99ac
...
...
@@ -2375,7 +2375,7 @@ public:
AlgorithmInfo
(
const
TensorLayout
&
p0
,
const
TensorLayout
&
p1
,
const
TensorLayout
&
p2
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
));
const
AlgoAttribute
&
attr
));
MOCK_METHOD3
(
get_all_algorithms
,
std
::
vector
<
Algorithm
*>
(
const
TensorLayout
&
p0
,
...
...
@@ -2385,7 +2385,7 @@ public:
Algorithm
*
(
const
TensorLayout
&
p0
,
const
TensorLayout
&
p1
,
const
TensorLayout
&
p2
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
));
const
AlgoAttribute
&
attr
));
MOCK_METHOD1
(
get_algorithm_from_desc
,
Algorithm
*
(
const
AlgorithmDesc
&
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录