Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
54b5db17
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
54b5db17
编写于
8月 12, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(x86/rvv): add AGENT_NCHW_NCHW44 algo
GitOrigin-RevId: 8cf6c3fac004c533c616a0266ea21376edb60f4d
上级
eaa18018
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
323 addition
and
18 deletion
+323
-18
dnn/src/fallback/conv_bias/gi/fp32/algos.h
dnn/src/fallback/conv_bias/gi/fp32/algos.h
+21
-0
dnn/src/fallback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_agent_algo.cpp
...k/conv_bias/gi/fp32/f32_direct_nchw_nchw44_agent_algo.cpp
+256
-0
dnn/src/fallback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_algo.cpp
...allback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_algo.cpp
+0
-13
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+16
-4
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+2
-0
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+2
-1
dnn/src/fallback/convolution/opr_impl.h
dnn/src/fallback/convolution/opr_impl.h
+1
-0
dnn/test/fallback/conv_bias.cpp
dnn/test/fallback/conv_bias.cpp
+25
-0
未找到文件。
dnn/src/fallback/conv_bias/gi/fp32/algos.h
浏览文件 @
54b5db17
#pragma once
#include "src/common/opr_delegate.h"
#include "src/fallback/conv_bias/opr_impl.h"
#include "src/fallback/matrix_mul/opr_impl.h"
...
...
@@ -249,6 +250,26 @@ public:
MEGDNN_DECL_ALGO_TYPE
(
GI_COMMON_DIRECT_NCHW_NCHW44_FP32
)
};
class
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44AGENT
final
:
public
AlgoBase
{
SmallVector
<
NCBKern
>
get_kimpls
(
const
NCBKernSizeParam
&
param
)
const
;
public:
AlgoF32DirectNCHWNCHW44AGENT
(){};
AlgoAttribute
attribute
()
const
override
{
return
AlgoAttribute
::
REPRODUCIBLE
;
}
const
char
*
name
()
const
override
{
return
"F32_CONV_AGENT_NCHW_NCHW44"
;
}
bool
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
algo_selection_strategy
)
const
override
;
size_t
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
override
;
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
const
NCBKernSizeParam
&
param
)
const
override
;
ConvAlgoTypePack
get_algo_type
()
const
override
{
return
{
AlgoDataType
::
FLOAT32
,
AlgoCategory
::
DIRECT
};
}
MEGDNN_DECL_ALGO_TYPE
(
GI_COMMON_DIRECT_NCHW_NCHW44_AGENT_FP32
)
};
class
ConvBiasImpl
::
AlgoF32ChannelWiseNCHW44
final
:
public
AlgoBase
{
SmallVector
<
NCBKern
>
get_kimpls
(
const
NCBKernSizeParam
&
param
)
const
;
...
...
dnn/src/fallback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_agent_algo.cpp
0 → 100644
浏览文件 @
54b5db17
#include "megdnn/opr_param_defs.h"
#include "megdnn/oprs.h"
#include "src/common/nchw_nchwxx_valid.h"
#include "src/common/opr_delegate.h"
#include "src/fallback/conv_bias/gi/fp32/algos.h"
#include "src/fallback/elemwise_helper/elemwise_op.h"
#include "midout.h"
using
namespace
megdnn
;
using
namespace
fallback
;
MIDOUT_DECL
(
megdnn_fallback_conv_bias_fp32_nchw_nchw44_agent
)
namespace
{
param
::
ConvBias
get_param_convbias
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
p
)
{
param
::
ConvBias
::
Mode
mode
;
if
(
p
.
filter_meta
.
should_flip
)
{
mode
=
param
::
ConvBias
::
Mode
::
CONVOLUTION
;
}
else
{
mode
=
param
::
ConvBias
::
Mode
::
CROSS_CORRELATION
;
}
return
param
::
ConvBias
{
p
.
nonlineMode
,
mode
,
param
::
ConvBias
::
Sparse
::
DENSE
,
ConvBias
::
Param
::
Format
::
NCHW
,
p
.
filter_meta
.
padding
[
0
],
p
.
filter_meta
.
padding
[
1
],
p
.
filter_meta
.
stride
[
0
],
p
.
filter_meta
.
stride
[
1
],
p
.
filter_meta
.
dilation
[
0
],
p
.
filter_meta
.
dilation
[
1
],
megdnn
::
param
::
ConvBias
::
ComputeMode
::
DEFAULT
};
}
TensorLayoutArray
get_layouts
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
p
)
{
UNPACK_CONV_NCB_KERN_SIZES
(
p
);
MEGDNN_MARK_USED_VAR
(
SH
);
MEGDNN_MARK_USED_VAR
(
SW
);
MEGDNN_MARK_USED_VAR
(
PH
);
MEGDNN_MARK_USED_VAR
(
PW
);
MEGDNN_MARK_USED_VAR
(
OW
);
MEGDNN_MARK_USED_VAR
(
OH
);
TensorLayout
src_layout
({
N
,
IC
,
IH
,
IW
},
p
.
src_type
);
//! 44 filter to chw
TensorLayout
filter_layout44
({
OC
/
4
,
FH
,
FW
,
IC
,
4
},
p
.
filter_type
);
TensorLayout
filter_layout_reshape
({
OC
/
4
,
4
,
IC
,
FH
,
FW
},
p
.
filter_type
);
TensorLayout
filter_layout
({
OC
,
IC
,
FH
,
FW
},
p
.
filter_type
);
TensorLayout
bias_layout44
{{},
p
.
bias_type
};
TensorLayout
bias_layout
{{},
p
.
bias_type
};
TensorLayout
bias_layout_reshape
{{},
p
.
bias_type
};
if
(
p
.
bias_mode
==
BiasMode
::
BROADCAST_CHANNEL_BIAS
)
{
bias_layout44
=
TensorLayout
({
1
,
OC
/
4
,
1
,
1
,
4
},
p
.
bias_type
);
bias_layout_reshape
=
TensorLayout
({
1
,
OC
/
4
,
4
,
1
,
1
},
p
.
bias_type
);
bias_layout
=
TensorLayout
({
1
,
OC
,
1
,
1
},
p
.
bias_type
);
}
//! chw dst to 44
TensorLayout
dst_layout
=
TensorLayout
({
N
,
OC
,
OH
,
OW
},
p
.
dst_type
);
TensorLayout
dst_layout_reshape
=
TensorLayout
({
N
,
OC
/
4
,
4
,
OH
,
OW
},
p
.
dst_type
);
TensorLayout
dst_layout44
=
TensorLayout
({
N
,
OC
/
4
,
OH
,
OW
,
4
},
p
.
dst_type
);
return
{
src_layout
,
filter_layout
,
filter_layout44
,
bias_layout
,
bias_layout44
,
dst_layout
,
dst_layout44
,
filter_layout_reshape
,
bias_layout_reshape
,
dst_layout_reshape
};
}
static
WorkspaceBundle
get_bundle
(
const
ConvBiasImpl
::
NCBKernSizeParam
&
param
,
const
std
::
unique_ptr
<
ConvBias
>&
conv_bias_op
)
{
auto
layouts
=
get_layouts
(
param
);
auto
src_layout
=
layouts
[
0
];
auto
filter_layout
=
layouts
[
1
];
auto
bias_layout
=
layouts
[
3
];
auto
dst_layout
=
layouts
[
5
];
size_t
weight_relayout_workspace
=
filter_layout
.
span
().
dist_byte
();
size_t
bias_relayout_workspace
=
bias_layout
.
span
().
dist_byte
();
conv_bias_op
->
param
()
=
get_param_convbias
(
param
);
auto
dummy
=
TensorLayout
();
auto
conv_workspace
=
conv_bias_op
->
get_workspace_in_bytes
(
src_layout
,
filter_layout
,
bias_layout
,
dummy
,
dst_layout
,
nullptr
);
auto
conv_dst_workspace
=
dst_layout
.
span
().
dist_byte
();
return
{
nullptr
,
{
weight_relayout_workspace
,
bias_relayout_workspace
,
conv_workspace
,
conv_dst_workspace
}};
};
};
// namespace
namespace
{
inline
bool
is_usable
(
const
DTypeEnum
src_dtype
,
const
DTypeEnum
filter_dtype
,
const
DTypeEnum
dst_dtype
,
const
ConvolutionBase
<
param
::
Convolution
>::
CanonizedFilterMeta
&
fm
,
const
BiasMode
bias_mode
,
const
param
::
ConvBias
::
NonlineMode
nonline_mode
)
{
bool
ok_type
=
((
src_dtype
==
DTypeEnum
::
Float32
&&
filter_dtype
==
DTypeEnum
::
Float32
&&
(
dst_dtype
==
DTypeEnum
::
Float32
)))
&&
(
fm
.
format
==
param
::
Convolution
::
Format
::
NCHW44
);
bool
ok_nonline
=
nonline_mode
==
param
::
ConvBias
::
NonlineMode
::
IDENTITY
||
nonline_mode
==
param
::
ConvBias
::
NonlineMode
::
RELU
||
nonline_mode
==
param
::
ConvBias
::
NonlineMode
::
SIGMOID
||
nonline_mode
==
param
::
ConvBias
::
NonlineMode
::
H_SWISH
;
bool
ok_src_dst
=
fm
.
icpg
<
4
&&
(
fm
.
ocpg
%
4
==
0
&&
fm
.
ocpg
>=
4
)
&&
fm
.
group
==
1
;
bool
ok_filter
=
fm
.
spatial_ndim
==
2
&&
fm
.
spatial
[
0
]
==
fm
.
spatial
[
1
]
&&
(
fm
.
spatial
[
0
]
==
2
||
fm
.
spatial
[
0
]
==
3
||
fm
.
spatial
[
0
]
==
5
||
fm
.
spatial
[
0
]
==
7
);
bool
ok_slide
=
fm
.
dilation
[
0
]
==
1
&&
fm
.
dilation
[
1
]
==
1
&&
fm
.
stride
[
0
]
==
fm
.
stride
[
1
]
&&
(
fm
.
stride
[
0
]
==
1
||
fm
.
stride
[
1
]
==
2
);
bool
ok_conv
=
!
fm
.
should_flip
&&
bias_mode
!=
BiasMode
::
BIAS
;
bool
avaible
=
ok_type
&&
ok_nonline
&&
ok_src_dst
&&
ok_filter
&&
ok_slide
&&
ok_conv
;
return
avaible
;
}
};
// namespace
bool
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44AGENT
::
usable
(
const
NCBKernSizeParam
&
param
,
AlgoSelectionStrategy
)
const
{
return
is_usable
(
param
.
src_type
.
enumv
(),
param
.
filter_type
.
enumv
(),
param
.
dst_type
.
enumv
(),
param
.
filter_meta
,
param
.
bias_mode
,
param
.
nonlineMode
);
}
size_t
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44AGENT
::
get_workspace
(
const
NCBKernSizeParam
&
param
)
const
{
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_fp32_nchw_nchw44_agent
,
midout_iv
(
"AlgoF32DirectNCHWNCHW44AGENT::get_workspace"
_hash
))
{
auto
conv_bias_op
=
param
.
handle
->
create_operator
<
ConvBias
>
();
return
get_bundle
(
param
,
conv_bias_op
).
total_size_in_bytes
();
}
MIDOUT_END
();
return
0
;
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
AlgoF32DirectNCHWNCHW44AGENT
::
dispatch_kerns
(
const
NCBKernSizeParam
&
k_param
)
const
{
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ret_kerns
;
MIDOUT_BEGIN
(
megdnn_fallback_conv_bias_fp32_nchw_nchw44_agent
,
midout_iv
(
"AlgoF32DirectNCHWNCHW44AGENT::dispatch_kerns"
_hash
))
{
auto
filter_and_bias_dimshuffle
=
[](
const
NCBKernParam
&
kern_param
,
const
NCBKernIndex
&
)
{
auto
layouts
=
get_layouts
(
kern_param
);
auto
filter_layout_44
=
layouts
[
2
];
auto
bias_layout44
=
layouts
[
4
];
auto
filter_layout_reshape
=
layouts
[
7
];
auto
bias_layout_reshape
=
layouts
[
8
];
auto
conv_bias_op
=
kern_param
.
handle
->
create_operator
<
ConvBias
>
();
auto
bundle
=
get_bundle
(
kern_param
,
conv_bias_op
);
bundle
.
set
(
kern_param
.
workspace_ptr
);
auto
weight_ws
=
bundle
.
get
(
0
);
auto
bias_ws
=
bundle
.
get
(
1
);
//! relayout bias and weight
TensorND
chw_weight_t
=
TensorND
(
weight_ws
,
filter_layout_reshape
);
TensorND
weight44_t
=
TensorND
(
kern_param
.
filter_ptr
.
get_ptr
(),
filter_layout_44
.
dimshuffle
({
0
,
4
,
3
,
1
,
2
}));
auto
relayout_op
=
inplace_cpu_handle
()
->
create_operator
<
Relayout
>
();
relayout_op
->
exec
(
weight44_t
,
chw_weight_t
);
TensorND
chw_bias_t
=
TensorND
(
bias_ws
,
bias_layout_reshape
);
if
(
bias_layout44
.
ndim
!=
0
)
{
TensorND
bias44_t
=
TensorND
(
kern_param
.
bias_ptr
.
get_ptr
(),
bias_layout44
.
dimshuffle
({
0
,
1
,
4
,
2
,
3
}));
relayout_op
->
exec
(
bias44_t
,
chw_bias_t
);
}
};
ret_kerns
.
push_back
({
filter_and_bias_dimshuffle
,
{
1
}});
auto
do_agent_conv
=
[
&
ret_kerns
,
&
k_param
]()
{
auto
layouts
=
get_layouts
(
k_param
);
auto
src_layout
=
layouts
[
0
];
auto
filter_layout
=
layouts
[
1
];
auto
bias_layout
=
layouts
[
3
];
auto
dst_layout
=
layouts
[
5
];
//! do chw conv
auto
conv_bias_op
=
k_param
.
handle
->
create_operator
<
ConvBias
>
();
conv_bias_op
->
param
()
=
get_param_convbias
(
k_param
);
auto
dummy_z
=
TensorND
();
auto
&&
conv_bias_algo
=
static_cast
<
ConvBiasImpl
*>
(
conv_bias_op
.
get
())
->
get_algorithm_heuristic
(
src_layout
,
filter_layout
,
bias_layout
,
dummy_z
.
layout
,
dst_layout
,
std
::
numeric_limits
<
size_t
>::
max
(),
AlgoAttribute
::
DEFAULT
,
AlgoAttribute
::
DEFAULT
);
auto
new_param
=
k_param
;
new_param
.
filter_meta
.
format
=
ConvBias
::
Param
::
Format
::
NCHW
;
auto
&&
conv_bias_kerns
=
static_cast
<
AlgoBase
*>
(
conv_bias_algo
)
->
dispatch_kerns
(
new_param
);
for
(
size_t
i
=
0
;
i
<
conv_bias_kerns
.
size
();
i
++
)
{
auto
&&
kernel
=
conv_bias_kerns
[
i
];
auto
run
=
[
kernel
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
auto
conv_bias_op
=
p
.
handle
->
create_operator
<
ConvBias
>
();
auto
bundle
=
get_bundle
(
p
,
conv_bias_op
);
bundle
.
set
(
p
.
workspace_ptr
);
auto
weight_ws
=
bundle
.
get
(
0
);
auto
bias_ws
=
bundle
.
get
(
1
);
auto
chw_conv_ws
=
bundle
.
get
(
2
);
auto
chw_conv_ws_size
=
bundle
.
get_size
(
2
);
auto
chw_conv_dst_ws
=
bundle
.
get
(
3
);
auto
param
=
p
;
param
.
filter_ptr
=
weight_ws
;
param
.
bias_ptr
=
bias_ws
;
param
.
dst_ptr
=
chw_conv_dst_ws
;
param
.
workspace_ptr
=
chw_conv_ws
;
param
.
workspace_size
=
chw_conv_ws_size
;
kernel
.
kern
(
param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
};
ret_kerns
.
push_back
({
run
,
kernel
.
global_size
});
}
};
do_agent_conv
();
auto
dest_dimshuffle
=
[](
const
NCBKernParam
&
kern_param
,
const
NCBKernIndex
&
)
{
auto
param
=
kern_param
;
auto
layouts
=
get_layouts
(
param
);
auto
dst_layout44
=
layouts
[
6
];
auto
dst_layout_reshape
=
layouts
[
9
];
auto
conv_bias_op
=
kern_param
.
handle
->
create_operator
<
ConvBias
>
();
auto
bundle
=
get_bundle
(
kern_param
,
conv_bias_op
);
bundle
.
set
(
kern_param
.
workspace_ptr
);
auto
chw_conv_dst_ws
=
bundle
.
get
(
3
);
//! relayout dst to dst44 tensor
TensorND
chw44_dst_t
=
TensorND
(
kern_param
.
dst_ptr
.
get_ptr
(),
dst_layout44
);
TensorND
chw_dst_t
=
TensorND
(
chw_conv_dst_ws
,
dst_layout_reshape
);
auto
relayout_op
=
inplace_cpu_handle
()
->
create_operator
<
Relayout
>
();
relayout_op
->
exec
(
{
chw_conv_dst_ws
,
dst_layout_reshape
.
dimshuffle
({
0
,
1
,
3
,
4
,
2
})},
chw44_dst_t
);
};
ret_kerns
.
push_back
({
dest_dimshuffle
,
{
1
}});
return
ret_kerns
;
}
MIDOUT_END
();
}
// vim: syntax=cpp.doxygen
dnn/src/fallback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_algo.cpp
浏览文件 @
54b5db17
/**
* \file
dnn/src/fallback/conv_bias/gi/fp32/f32_direct_nchw_nchw44_algo.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied.
*/
#include "megdnn/oprs.h"
#include "src/common/nchw_nchwxx_valid.h"
#include "src/common/opr_delegate.h"
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
54b5db17
...
...
@@ -84,7 +84,8 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj {
AlgoBase
::
Mapper
m_all_algos_map
;
SmallVector
<
fallback
::
ConvBiasImpl
::
AlgoBase
*>
m_gi_winograd_algos
;
AlgoF32DirectNCHWNCHW44
f32_direct_stride2_nchw_nchw44
;
AlgoF32DirectNCHWNCHW44
f32_nchw_nchw44
;
AlgoF32DirectNCHWNCHW44AGENT
f32_nchw_nchw44_agent
;
AlgoF32ChannelWiseNCHW44
f32_chanel_wise_nchw44
;
AlgoF32DirectNCHW44
f32_direct_nchw44
;
...
...
@@ -94,8 +95,17 @@ class ConvBiasImpl::AlgoPack : NonCopyableObj {
public:
AlgoPack
()
{
// fallback gi fp32 algo
m_all_algos
.
emplace_back
(
&
f32_direct_stride2_nchw_nchw44
);
//! fallback gi fp32 algo
//! now f32_nchw_nchw44_agent is fast than f32_nchw_nchw44
//! on x86 and rvv platform, so we adjust heuristic order.
#if MEGDNN_AARCH64 || MEGDNN_ARMV7
m_all_algos
.
emplace_back
(
&
f32_nchw_nchw44
);
m_all_algos
.
emplace_back
(
&
f32_nchw_nchw44_agent
);
#else
m_all_algos
.
emplace_back
(
&
f32_nchw_nchw44_agent
);
m_all_algos
.
emplace_back
(
&
f32_nchw_nchw44
);
#endif
m_all_algos
.
emplace_back
(
&
f32_chanel_wise_nchw44
);
m_all_algos
.
emplace_back
(
&
f32_direct_nchw44
);
m_all_algos
.
emplace_back
(
&
f32_direct_stride1
);
...
...
@@ -471,7 +481,8 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
param
().
compute_mode
,
nr_threads
,
reinterpret_cast
<
const
ConvolutionForward
::
PreprocessedFilter
*>
(
preprocessed_filter
)},
preprocessed_filter
),
handle
()},
bias
.
dtype
,
bias
.
stride
[
0
],
bias_mode
,
...
...
@@ -491,6 +502,7 @@ ConvBiasImpl::NCBKernParam ConvBiasImpl::make_ncb_kern_param(
ret
.
dst_ptr
=
dst
.
get_ref_ptr
();
ret
.
workspace_ptr
=
workspace
.
raw_ptr
;
ret
.
workspace_size
=
workspace
.
size
;
ret
.
handle
=
handle
();
return
ret
;
}
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
54b5db17
...
...
@@ -228,6 +228,7 @@ public:
GI_COMMON_DIRECT_STRD2_FP32
,
GI_COMMON_DIRECT_NCHW44_FP32
,
GI_COMMON_DIRECT_NCHW_NCHW44_FP32
,
GI_COMMON_DIRECT_NCHW_NCHW44_AGENT_FP32
,
GI_COMMON_CHWNWISE_NCHW44_F32
,
#if MEGDNN_X86
...
...
@@ -389,6 +390,7 @@ private:
class
AlgoF32DirectStride1
;
class
AlgoF32DirectStride2
;
class
AlgoF32DirectNCHWNCHW44
;
class
AlgoF32DirectNCHWNCHW44AGENT
;
class
AlgoF32ChannelWiseNCHW44
;
class
AlgoF32DirectNCHW44
;
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
54b5db17
...
...
@@ -242,7 +242,8 @@ ConvolutionImpl::NCBKernSizeParam ConvolutionImpl::make_ncb_kern_size_param(
{
dst
.
stride
[
0
],
dst
.
stride
[
1
],
dst
.
stride
[
2
],
dst
.
stride
[
3
]},
param
().
compute_mode
,
nr_threads
,
preprocessed_filter
};
preprocessed_filter
,
handle
()};
}
ConvolutionImpl
::
NCBKernParam
ConvolutionImpl
::
make_ncb_kern_param
(
...
...
dnn/src/fallback/convolution/opr_impl.h
浏览文件 @
54b5db17
...
...
@@ -101,6 +101,7 @@ public:
const
PreprocessedFilter
*
preprocessed_filter
;
//! get the data type category of the param for select the algo
AlgoDataType
deduce_algo_data_type
()
const
;
Handle
*
handle
;
};
//! memory param for kernels with non-contiguous batch
...
...
dnn/test/fallback/conv_bias.cpp
浏览文件 @
54b5db17
...
...
@@ -354,6 +354,31 @@ TEST_F(FALLBACK_MULTI_THREADS, CONVBIAS_GI_NCHW_NCHW44_F32_S1) {
handle
(),
"F32_CONV_NCHW_NCHW44"
);
}
#define CB(_MODE, _SUFFIX) \
TEST_F(FALLBACK_MULTI_THREADS, CONVBIAS_GI_NCHW_NCHW44_F32_S2_AGENT_##_SUFFIX) { \
check_conv_bias( \
conv_bias::get_nchw44_conv_bias_args( \
{2, 3, 5, 7}, ONLY_IDENTITY_NLMODE, {_MODE}, 2, false, true), \
handle(), "F32_CONV_AGENT_NCHW_NCHW44"); \
}
CB
(
megdnn
::
BiasMode
::
NO_BIAS
,
NO_BIAS
);
CB
(
megdnn
::
BiasMode
::
BROADCAST_CHANNEL_BIAS
,
BROADCAST_CHANNEL_BIAS
);
#undef CB
#define CB(_MODE, _SUFFIX) \
TEST_F(FALLBACK_MULTI_THREADS, \
CONVBIAS_GI_NCHW_NCHW44_F32_S1_AGENT_IDENTITY_##_SUFFIX) { \
check_conv_bias( \
conv_bias::get_nchw44_conv_bias_args( \
{2, 3, 5, 7}, {_MODE}, ONLY_BR_BIASMODE, 1, false, true), \
handle(), "F32_CONV_AGENT_NCHW_NCHW44"); \
}
CB
(
param
::
ConvBias
::
NonlineMode
::
IDENTITY
,
IDENTITY
);
CB
(
param
::
ConvBias
::
NonlineMode
::
RELU
,
RELU
);
CB
(
param
::
ConvBias
::
NonlineMode
::
H_SWISH
,
H_SWISH
);
CB
(
param
::
ConvBias
::
NonlineMode
::
SIGMOID
,
SIGMOID
);
#undef CB
std
::
vector
<
conv_bias
::
TestArg
>
get_nchw44_channel_wise_args
(
std
::
vector
<
size_t
>
kernel
,
size_t
stride
,
bool
no_bias
,
bool
no_nonlinemode
,
bool
no_full_bias
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录