Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
b8b000db
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
410
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
b8b000db
编写于
6月 23, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/fallback): fix fallback interface of weight preprocess
GitOrigin-RevId: ca860f487e2c1d1264ab2b3ce0a5515c383b8dcb
上级
cf3a55ce
变更
16
隐藏空白更改
内联
并排
Showing
16 changed file
with
625 addition
and
156 deletion
+625
-156
dnn/include/megdnn/oprs/nn.h
dnn/include/megdnn/oprs/nn.h
+9
-10
dnn/src/common/conv_bias.cpp
dnn/src/common/conv_bias.cpp
+7
-4
dnn/src/common/convolution.cpp
dnn/src/common/convolution.cpp
+3
-2
dnn/src/cuda/conv_bias/opr_impl.cpp
dnn/src/cuda/conv_bias/opr_impl.cpp
+2
-2
dnn/src/cuda/conv_bias/opr_impl.h
dnn/src/cuda/conv_bias/opr_impl.h
+4
-7
dnn/src/fallback/conv_bias/opr_impl.cpp
dnn/src/fallback/conv_bias/opr_impl.cpp
+102
-42
dnn/src/fallback/conv_bias/opr_impl.h
dnn/src/fallback/conv_bias/opr_impl.h
+49
-34
dnn/src/fallback/convolution/algos.cpp
dnn/src/fallback/convolution/algos.cpp
+63
-3
dnn/src/fallback/convolution/algos.h
dnn/src/fallback/convolution/algos.h
+14
-0
dnn/src/fallback/convolution/opr_impl.cpp
dnn/src/fallback/convolution/opr_impl.cpp
+96
-15
dnn/src/fallback/convolution/opr_impl.h
dnn/src/fallback/convolution/opr_impl.h
+46
-23
dnn/src/naive/conv_bias/opr_impl.cpp
dnn/src/naive/conv_bias/opr_impl.cpp
+4
-3
dnn/src/naive/conv_bias/opr_impl.h
dnn/src/naive/conv_bias/opr_impl.h
+1
-3
dnn/src/naive/convolution/convolution.cpp
dnn/src/naive/convolution/convolution.cpp
+2
-2
dnn/src/naive/convolution/opr_impl.h
dnn/src/naive/convolution/opr_impl.h
+1
-3
dnn/test/common/opr_proxy.h
dnn/test/common/opr_proxy.h
+222
-3
未找到文件。
dnn/include/megdnn/oprs/nn.h
浏览文件 @
b8b000db
...
...
@@ -234,10 +234,10 @@ public:
const
TensorLayout
&
dst
)
=
0
;
protected:
CanonizedFilterMeta
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
);
CanonizedFilterMeta
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
,
const
PreprocessedFilter
*
preprocessed_filter
);
};
using
Convolution
=
ConvolutionForward
;
...
...
@@ -408,12 +408,11 @@ public:
static
WinogradParam
parse_winograd_name
(
const
std
::
string
&
algo_name
);
protected:
CanonizedFilterMeta
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
);
CanonizedFilterMeta
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
,
const
PreprocessedFilter
*
preprocessed_filter
);
};
using
ConvBias
=
ConvBiasForward
;
...
...
dnn/src/common/conv_bias.cpp
浏览文件 @
b8b000db
...
...
@@ -32,7 +32,8 @@ void ConvBiasForward::deduce_layout(const TensorLayout& src,
ConvBiasForward
::
CanonizedFilterMeta
ConvBiasForward
::
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
)
{
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
if
((
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW_WINOGRAD
||
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW88_WINOGRAD
||
param
().
format
==
param
::
ConvBias
::
Format
::
NCHW44_WINOGRAD
)
&&
...
...
@@ -82,9 +83,11 @@ ConvBiasForward::CanonizedFilterMeta ConvBiasForward::check_exec(
auto
ret
=
check_layout_fwd
(
src
,
filter
,
dst
);
megdnn_assert_contiguous
(
bias
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
filter
,
bias
,
z
,
dst
,
nullptr
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
filter
,
bias
,
z
,
dst
,
preprocessed_filter
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
,
"worksapce have size of %zu, but need %zu"
,
workspace_in_bytes
,
required_workspace_in_bytes
);
if
(
bias
.
ndim
!=
0
)
{
//! bias.layout == dst.layout failed, no assert information
auto
check_eq
=
[](
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
)
{
...
...
dnn/src/common/convolution.cpp
浏览文件 @
b8b000db
...
...
@@ -1028,10 +1028,11 @@ void ConvolutionForward::deduce_layout(const TensorLayout& src,
ConvolutionForward
::
CanonizedFilterMeta
ConvolutionForward
::
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
)
{
const
TensorLayout
&
dst
,
size_t
workspace_in_bytes
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
ret
=
check_layout_fwd
(
src
,
filter
,
dst
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
filter
,
dst
,
nullpt
r
);
get_workspace_in_bytes
(
src
,
filter
,
dst
,
preprocessed_filte
r
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
return
ret
;
}
...
...
dnn/src/cuda/conv_bias/opr_impl.cpp
浏览文件 @
b8b000db
...
...
@@ -25,10 +25,10 @@ namespace cuda {
void
ConvBiasForwardImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
check_exec
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
,
workspace
.
size
);
workspace
.
size
,
preprocessed_filter
);
AlgoBase
::
ExecArgs
args
(
this
,
src
,
filter
,
bias
,
z
,
dst
,
workspace
);
auto
algo
=
get_algorithm
(
this
,
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
);
...
...
dnn/src/cuda/conv_bias/opr_impl.h
浏览文件 @
b8b000db
...
...
@@ -52,13 +52,10 @@ public:
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
{};
}
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"cuda conv_bias exec_preprocess has not implemeted yet"
);
}
...
...
dnn/src/fallback/conv_bias/opr_impl.cpp
浏览文件 @
b8b000db
...
...
@@ -119,17 +119,22 @@ SmallVector<ConvBiasImpl::AlgoBase*> ConvBiasImpl::algo_pack() {
bool
ConvBiasImpl
::
is_naive_algo
(
ConvBiasImpl
::
Algorithm
*
algo
)
{
return
algo
==
nullptr
||
strcmp
(
algo
->
name
(),
"DEFAULT"
)
==
0
;
}
#define NCB_ALGO_FUNC(name, algo, param) \
static_cast<AlgoBase*>(algo)->name(this, param)
void
ConvBiasImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
check_exec
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
,
workspace
.
size
);
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
bias
,
dst
,
workspace
);
workspace
.
size
,
preprocessed_filter
);
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
bias
,
dst
,
workspace
,
preprocessed_filter
);
ConvBiasImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
if
(
!
is_naive_algo
(
algo
)
&&
ncb_algo_get_workspace
(
algo
,
fparam
)
<=
workspace
.
size
)
{
NCB_ALGO_FUNC
(
get_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_with_ncb_kern
(
fparam
,
algo
);
}
else
{
naive
::
ConvBiasForwardImpl
::
exec
(
src
,
filter
,
bias
,
z
,
dst
,
...
...
@@ -137,18 +142,71 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
}
}
void
ConvBiasImpl
::
exec_preprocess
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
bias_layout
,
const
TensorLayout
&
z_layout
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
//! exec_preprocess currently only support preprocess weights before exec,
//! src/dst/bias/z will be ignored, just set to nullptr
TensorND
src
{
nullptr
,
src_layout
},
dst
{
nullptr
,
dst_layout
},
bias
{
nullptr
,
bias_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
bias
,
dst
,
workspace
,
preprocessed_filter
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
}
else
{
naive
::
ConvBiasForwardImpl
::
exec_preprocess
(
src_layout
,
filter
,
bias_layout
,
z_layout
,
dst_layout
,
preprocessed_filter
,
workspace
);
}
}
size_t
ConvBiasImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
preprocessed_filter
);
ConvBiasImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvBiasForwardImpl
::
get_workspace_in_bytes
(
src
,
filter
,
bias
,
z
,
dst
,
preprocessed_filter
);
}
else
{
return
ncb_algo_get_workspace
(
algo
,
fparam
);
return
NCB_ALGO_FUNC
(
get_workspace
,
algo
,
fparam
);
}
}
size_t
ConvBiasImpl
::
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
nullptr
);
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvBiasForwardImpl
::
get_preprocess_workspace_in_bytes
(
src
,
filter
,
bias
,
z
,
dst
);
}
else
{
return
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
);
}
}
SmallVector
<
TensorLayout
>
ConvBiasImpl
::
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
nullptr
);
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvBiasForwardImpl
::
deduce_preprocessed_filter_layout
(
src
,
filter
,
bias
,
z
,
dst
);
}
else
{
return
NCB_ALGO_FUNC
(
deduce_preprocessed_filter_layout
,
algo
,
fparam
);
}
}
...
...
@@ -156,7 +214,7 @@ std::vector<ConvBiasImpl::Algorithm*> ConvBiasImpl::get_all_algorithms(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
nullptr
);
auto
ret
=
get_all_algorithms_with_ncb
(
fparam
);
if
(
ret
.
empty
())
{
return
naive
::
ConvBiasForwardImpl
::
get_all_algorithms
(
src
,
filter
,
bias
,
...
...
@@ -170,7 +228,7 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic(
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
bias
,
dst
,
nullptr
);
auto
result
=
get_algorithm_heuristic_with_ncb
(
fparam
,
workspace_limit_in_bytes
,
reproducible
);
if
(
result
==
nullptr
)
{
...
...
@@ -181,9 +239,25 @@ ConvBiasImpl::Algorithm* ConvBiasImpl::get_algorithm_heuristic(
return
result
;
}
ConvBiasImpl
::
Algorithm
*
ConvBiasImpl
::
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
for
(
auto
i
:
get_all_algorithms_with_ncb
(
param
))
{
size_t
need_workspace
=
NCB_ALGO_FUNC
(
get_workspace
,
i
,
param
);
if
(
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
)
&&
need_workspace
<=
workspace_limit_in_bytes
)
{
return
i
;
}
}
return
nullptr
;
}
ConvBiasImpl
::
NCBKernSizeParam
ConvBiasImpl
::
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
)
{
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
safe_u32
=
[](
size_t
v
)
->
uint32_t
{
megdnn_assert
(
v
<=
std
::
numeric_limits
<
uint32_t
>::
max
(),
"value too large: %zu"
,
v
);
...
...
@@ -258,7 +332,9 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
{
src
.
stride
[
0
],
src
.
stride
[
1
],
src
.
stride
[
2
],
src
.
stride
[
3
]},
{
dst
.
stride
[
0
],
dst
.
stride
[
1
],
dst
.
stride
[
2
],
dst
.
stride
[
3
]},
param
().
compute_mode
,
nr_threads
},
nr_threads
,
reinterpret_cast
<
const
ConvolutionForward
::
PreprocessedFilter
*>
(
preprocessed_filter
)},
param
().
output_block_size
,
format
,
bias
.
dtype
,
...
...
@@ -269,10 +345,12 @@ ConvBiasImpl::NCBKernSizeParam ConvBiasImpl::make_ncb_kern_size_param(
ConvBiasImpl
::
NCBKernParam
ConvBiasImpl
::
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
NCBKernParam
ret
;
static_cast
<
NCBKernSizeParam
&>
(
ret
)
=
make_ncb_kern_size_param
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
dst
.
layout
);
static_cast
<
NCBKernSizeParam
&>
(
ret
)
=
make_ncb_kern_size_param
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
dst
.
layout
,
preprocessed_filter
);
ret
.
src_ptr
=
src
.
raw_ptr
;
ret
.
filter_ptr
=
filter
.
raw_ptr
;
ret
.
bias_ptr
=
bias
.
raw_ptr
;
...
...
@@ -284,7 +362,7 @@ ConvBiasImpl::NCBKernParam ConvBiasImpl::make_ncb_kern_param(
void
ConvBiasImpl
::
exec_with_ncb_kern
(
const
NCBKernParam
&
param
,
ConvBiasImpl
::
Algorithm
*
algo
)
{
auto
ncb_kerns
=
ncb_algo_dispatch_kerns
(
algo
,
param
);
auto
ncb_kerns
=
NCB_ALGO_FUNC
(
dispatch_kerns
,
algo
,
param
);
for
(
auto
&&
kernel
:
ncb_kerns
)
{
auto
run
=
[
kernel
,
param
](
size_t
index
,
size_t
thread_id
)
{
CpuNDRange
ndrange_id
(
kernel
.
global_size
,
index
);
...
...
@@ -295,21 +373,17 @@ void ConvBiasImpl::exec_with_ncb_kern(const NCBKernParam& param,
}
}
ConvBiasImpl
::
Algorithm
*
ConvBiasImpl
::
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
return
ncb_algo_get_algorithm_heuristic
(
param
,
workspace_limit_in_bytes
,
reproducible
);
}
size_t
ConvBiasImpl
::
ncb_algo_get_workspace
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
)
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
this
,
param
);
}
SmallVector
<
ConvBiasImpl
::
NCBKern
>
ConvBiasImpl
::
ncb_algo_dispatch_kerns
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
)
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_kerns
(
this
,
param
);
void
ConvBiasImpl
::
exec_preprocess_with_ncb_kern
(
const
NCBKernParam
&
param
,
ConvBiasImpl
::
Algorithm
*
algo
)
{
auto
ncb_kerns
=
NCB_ALGO_FUNC
(
dispatch_preprocess_kerns
,
algo
,
param
);
for
(
auto
&&
kernel
:
ncb_kerns
)
{
auto
run
=
[
kernel
,
param
](
size_t
index
,
size_t
thread_id
)
{
CpuNDRange
ndrange_id
(
kernel
.
global_size
,
index
);
kernel
.
kern
(
param
,
{
thread_id
,
ndrange_id
});
};
static_cast
<
naive
::
HandleImpl
*>
(
handle
())
->
dispatch_kern
(
run
,
kernel
.
global_size
.
total_size
());
}
}
std
::
vector
<
ConvBiasImpl
::
Algorithm
*>
ConvBiasImpl
::
get_all_algorithms_with_ncb
(
...
...
@@ -332,20 +406,6 @@ std::vector<ConvBiasImpl::Algorithm*> ConvBiasImpl::get_all_algorithms_with_ncb(
return
algos
;
}
ConvBiasImpl
::
Algorithm
*
ConvBiasImpl
::
ncb_algo_get_algorithm_heuristic
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
for
(
auto
i
:
get_all_algorithms_with_ncb
(
param
))
{
if
(
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
)
&&
ncb_algo_get_workspace
(
i
,
param
)
<=
workspace_limit_in_bytes
)
{
return
i
;
}
}
return
nullptr
;
}
ConvBiasImpl
::
Algorithm
*
ConvBiasImpl
::
get_algorithm
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_size
)
{
if
(
auto
set
=
execution_policy
().
algorithm
)
{
...
...
dnn/src/fallback/conv_bias/opr_impl.h
浏览文件 @
b8b000db
...
...
@@ -51,6 +51,25 @@ public:
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
,
_megdnn_workspace
workspace
)
override
;
void
exec_preprocess
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
bias_layout
,
const
TensorLayout
&
z_layout
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
override
;
size_t
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
z
,
const
TensorLayout
&
dst
)
override
;
//! implemented by get_workspace_with_ncb()
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
...
...
@@ -198,6 +217,23 @@ public:
virtual
SmallVector
<
NCBKern
>
dispatch_kerns
(
ConvBiasImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_preprocess_kerns
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the layouts of weight_prerocess dst
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the workspace when weight_prerocess
virtual
size_t
get_preprocess_workspace
(
ConvBiasImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
0
_z
;
};
//! Temporarily used to identify whether the matmul algorithm is
//! is_preferred.
virtual
bool
is_preferred
(
ConvBiasImpl
*
,
...
...
@@ -219,40 +255,19 @@ public:
virtual
SmallVector
<
AlgoBase
*>
algo_pack
();
protected:
//! default impl calls ncb_algo_dispatch_kern()
virtual
void
exec_with_ncb_kern
(
const
NCBKernParam
&
param
,
ConvBiasImpl
::
Algorithm
*
algo
);
//! default impl calls ncb_algo_get_all_algorithms()
virtual
void
exec_preprocess_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
);
virtual
std
::
vector
<
Algorithm
*>
get_all_algorithms_with_ncb
(
const
NCBKernSizeParam
&
param
);
//! default impl calls ncb_algo_get_algorithm_heuristic()
virtual
Algorithm
*
get_algorithm_heuristic_with_ncb
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
/**
* \brief get kernel pointer for non-contiguous batch kernel or
* simply conv bias kernel.
*
* whether the kernel processing batch 1-group is decided by the
* algo.
*/
virtual
SmallVector
<
NCBKern
>
ncb_algo_dispatch_kerns
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
);
virtual
size_t
ncb_algo_get_workspace
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
);
/*!
* the default impl iterates over all ncb_algo_get_all_algorithms()
* and return the first one whose workspace does not exceed the limit.
*/
virtual
Algorithm
*
ncb_algo_get_algorithm_heuristic
(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
const
char
*
get_algorithm_set_name
()
const
override
;
private:
...
...
@@ -276,16 +291,16 @@ private:
const
NCBKernSizeParam
&
param
,
size_t
workspace_size
=
std
::
numeric_limits
<
size_t
>::
max
());
NCBKernSizeParam
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
);
NCBKernParam
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
NCBKernSizeParam
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
bias
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
);
NCBKernParam
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
,
const
PreprocessedFilter
*
preprocessed_filter
);
};
}
// namespace fallback
...
...
dnn/src/fallback/convolution/algos.cpp
浏览文件 @
b8b000db
...
...
@@ -376,7 +376,67 @@ size_t ConvolutionImpl::AlgoDefault::get_workspace(
return
get_bundle
(
param
).
total_size_in_bytes
();
}
//! Return the implment kernel
size_t
ConvolutionImpl
::
AlgoDefault
::
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_convbias_opr_and_param
(
m_conv_bias_opr
,
param
);
m_conv_bias_opr
->
execution_policy
()
=
{
m_algorithm
};
return
m_algorithm
->
get_preprocess_workspace
(
m_conv_bias_opr
,
conv_bias_param
);
}
SmallVector
<
TensorLayout
>
ConvolutionImpl
::
AlgoDefault
::
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
{
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_param
=
init_convbias_opr_and_param
(
m_conv_bias_opr
,
param
);
m_conv_bias_opr
->
execution_policy
()
=
{
m_algorithm
};
return
m_algorithm
->
deduce_preprocessed_filter_layout
(
m_conv_bias_opr
,
conv_bias_param
);
}
//! Return the implement preprocess kernel
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoDefault
::
get_preprocess_kimpl
(
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
)
{
MIDOUT_BEGIN
(
megdnn_fallback_conv
,
midout_iv
(
"get_preprocess_kimpl"
_hash
))
{
// construct the conv_bias kern param
::
ConvBiasImpl
::
NCBKernParam
conv_bias_param
;
::
ConvBiasImpl
::
NCBKernSizeParam
conv_bias_size_param
=
init_convbias_opr_and_param
(
conv_bias_opr
,
param
);
static_cast
<::
ConvBiasImpl
::
NCBKernSizeParam
&>
(
conv_bias_param
)
=
conv_bias_size_param
;
auto
conv_bias_preprocess_kerns
=
algo
->
dispatch_preprocess_kerns
(
conv_bias_opr
,
conv_bias_param
);
SmallVector
<
ConvolutionImpl
::
NCBKern
>
convolution_preprocess_kerns
;
//! Set the conv_bias param using convolution param
auto
set_copy_param_filter_workspace_ptr
=
[](
const
NCBKernParam
&
conv_param
,
::
ConvBiasImpl
::
NCBKernParam
&
copied_param
)
{
copied_param
.
filter_ptr
=
conv_param
.
filter_ptr
;
copied_param
.
workspace_ptr
=
conv_param
.
workspace_ptr
;
copied_param
.
workspace_size
=
conv_param
.
workspace_size
;
};
for
(
size_t
i
=
0
;
i
<
conv_bias_preprocess_kerns
.
size
();
i
++
)
{
auto
kernel
=
conv_bias_preprocess_kerns
[
i
];
//! If the kerenl batch parallel
auto
run
=
[
=
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
auto
copy_param
=
conv_bias_param
;
set_copy_param_filter_workspace_ptr
(
p
,
copy_param
);
kernel
.
kern
(
copy_param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
};
convolution_preprocess_kerns
.
push_back
({
run
,
kernel
.
global_size
});
}
return
convolution_preprocess_kerns
;
}
MIDOUT_END
();
}
//! Return the implement kernel
SmallVector
<
ConvolutionImpl
::
NCBKern
>
ConvolutionImpl
::
AlgoDefault
::
get_kimpl
(
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
)
{
...
...
@@ -392,7 +452,7 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl(
SmallVector
<
ConvolutionImpl
::
NCBKern
>
convolution_kerns
;
//! Set the conv_bias param using convolution param
auto
set_copy_param_
run_tim
e_address
=
auto
set_copy_param_
comput
e_address
=
[](
const
NCBKernParam
&
conv_param
,
::
ConvBiasImpl
::
NCBKernParam
&
copied_param
)
{
copied_param
.
src_ptr
=
conv_param
.
src_ptr
;
...
...
@@ -407,7 +467,7 @@ SmallVector<ConvolutionImpl::NCBKern> ConvolutionImpl::AlgoDefault::get_kimpl(
auto
run
=
[
=
](
const
NCBKernParam
&
p
,
const
NCBKernIndex
&
ncb_index
)
{
auto
copy_param
=
conv_bias_param
;
set_copy_param_
run_tim
e_address
(
p
,
copy_param
);
set_copy_param_
comput
e_address
(
p
,
copy_param
);
kernel
.
kern
(
copy_param
,
{
ncb_index
.
thread_id
,
ncb_index
.
ndrange_id
});
};
...
...
dnn/src/fallback/convolution/algos.h
浏览文件 @
b8b000db
...
...
@@ -110,6 +110,9 @@ class ConvolutionImpl::AlgoDefault final : public AlgoBase {
static
SmallVector
<
NCBKern
>
get_kimpl
(
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
);
static
SmallVector
<
NCBKern
>
get_preprocess_kimpl
(
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
algo
,
const
NCBKernSizeParam
&
param
);
public:
AlgoDefault
(
fallback
::
ConvBiasImpl
*
conv_bias_opr
,
ConvBiasImpl
::
AlgoBase
*
);
...
...
@@ -121,6 +124,17 @@ public:
size_t
get_workspace
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
override
;
size_t
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
override
;
SmallVector
<
NCBKern
>
dispatch_preprocess_kern
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
param
)
const
override
{
return
get_preprocess_kimpl
(
m_conv_bias_opr
,
m_algorithm
,
param
);
}
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
/*opr*/
,
const
NCBKernSizeParam
&
param
)
const
override
{
...
...
dnn/src/fallback/convolution/opr_impl.cpp
浏览文件 @
b8b000db
...
...
@@ -80,14 +80,19 @@ SmallVector<ConvolutionImpl::AlgoBase*> ConvolutionImpl::algo_pack() {
bool
ConvolutionImpl
::
is_naive_algo
(
ConvolutionImpl
::
Algorithm
*
algo
)
{
return
algo
==
nullptr
||
strcmp
(
algo
->
name
(),
"DEFAULT"
)
==
0
;
}
#define NCB_ALGO_FUNC(name, algo, param) \
static_cast<AlgoBase*>(algo)->name(this, fparam)
void
ConvolutionImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
dst
,
workspace
);
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
dst
,
preprocessed_filter
,
workspace
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
if
(
!
is_naive_algo
(
algo
)
&&
ncb_algo_get_workspace
(
algo
,
fparam
)
<=
workspace
.
size
)
{
NCB_ALGO_FUNC
(
get_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_with_ncb_kern
(
fparam
,
algo
);
}
else
{
naive
::
ConvolutionForwardImpl
::
exec
(
src
,
filter
,
dst
,
...
...
@@ -95,24 +100,73 @@ void ConvolutionImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
}
}
void
ConvolutionImpl
::
exec_preprocess
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
//! exec_preprocess currently only support preprocess weights before exec,
//! src/dst will be ignored, just set to nullptr
TensorND
src
{
nullptr
,
src_layout
},
dst
{
nullptr
,
dst_layout
};
auto
fparam
=
make_ncb_kern_param
(
src
,
filter
,
dst
,
preprocessed_filter
,
workspace
);
ConvolutionImpl
::
Algorithm
*
algo
=
get_algorithm
(
fparam
,
workspace
.
size
);
if
(
!
is_naive_algo
(
algo
)
&&
NCB_ALGO_FUNC
(
get_preprocess_workspace
,
algo
,
fparam
)
<=
workspace
.
size
)
{
exec_preprocess_with_ncb_kern
(
fparam
,
algo
);
}
else
{
naive
::
ConvolutionForwardImpl
::
exec_preprocess
(
src_layout
,
filter
,
dst_layout
,
preprocessed_filter
,
workspace
);
}
}
size_t
ConvolutionImpl
::
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
preprocessed_filter
);
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvolutionForwardImpl
::
get_workspace_in_bytes
(
src
,
filter
,
dst
,
preprocessed_filter
);
}
else
{
return
ncb_algo_get_workspace
(
algo
,
fparam
);
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
this
,
fparam
);
}
}
size_t
ConvolutionImpl
::
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
nullptr
);
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvolutionForwardImpl
::
get_preprocess_workspace_in_bytes
(
src
,
filter
,
dst
);
}
else
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_preprocess_workspace
(
this
,
fparam
);
}
}
SmallVector
<
TensorLayout
>
ConvolutionImpl
::
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
){
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
nullptr
);
Algorithm
*
algo
=
get_algorithm
(
fparam
);
if
(
is_naive_algo
(
algo
))
{
return
naive
::
ConvolutionForwardImpl
::
deduce_preprocessed_filter_layout
(
src
,
filter
,
dst
);
}
else
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
deduce_preprocessed_filter_layout
(
this
,
fparam
);
}
}
std
::
vector
<
ConvolutionImpl
::
Algorithm
*>
ConvolutionImpl
::
get_all_algorithms
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
nullptr
);
auto
ret
=
get_all_algorithms_with_ncb
(
fparam
);
if
(
ret
.
empty
())
{
return
naive
::
ConvolutionForwardImpl
::
get_all_algorithms
(
src
,
filter
,
...
...
@@ -125,7 +179,7 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
);
auto
fparam
=
make_ncb_kern_size_param
(
src
,
filter
,
dst
,
nullptr
);
auto
result
=
get_algorithm_heuristic_with_ncb
(
fparam
,
workspace_limit_in_bytes
,
reproducible
);
if
(
result
==
nullptr
)
{
...
...
@@ -137,7 +191,8 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic(
ConvolutionImpl
::
NCBKernSizeParam
ConvolutionImpl
::
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
{
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
)
{
auto
safe_u32
=
[](
size_t
v
)
->
uint32_t
{
megdnn_assert
(
v
<=
std
::
numeric_limits
<
uint32_t
>::
max
(),
"value too large: %zu"
,
v
);
...
...
@@ -175,15 +230,17 @@ ConvolutionImpl::NCBKernSizeParam ConvolutionImpl::make_ncb_kern_size_param(
{
src
.
stride
[
0
],
src
.
stride
[
1
],
src
.
stride
[
2
],
src
.
stride
[
3
]},
{
dst
.
stride
[
0
],
dst
.
stride
[
1
],
dst
.
stride
[
2
],
dst
.
stride
[
3
]},
param
().
compute_mode
,
nr_threads
};
nr_threads
,
preprocessed_filter
};
}
ConvolutionImpl
::
NCBKernParam
ConvolutionImpl
::
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
NCBKernParam
ret
;
static_cast
<
NCBKernSizeParam
&>
(
ret
)
=
make_ncb_kern_size_param
(
src
.
layout
,
filter
.
layout
,
dst
.
layout
);
static_cast
<
NCBKernSizeParam
&>
(
ret
)
=
make_ncb_kern_size_param
(
src
.
layout
,
filter
.
layout
,
dst
.
layout
,
preprocessed_filter
);
ret
.
src_ptr
=
src
.
raw_ptr
;
ret
.
filter_ptr
=
filter
.
raw_ptr
;
ret
.
dst_ptr
=
dst
.
raw_ptr
;
...
...
@@ -192,9 +249,30 @@ ConvolutionImpl::NCBKernParam ConvolutionImpl::make_ncb_kern_param(
return
ret
;
}
void
ConvolutionImpl
::
exec_preprocess_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
)
{
auto
kerns
=
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_preprocess_kern
(
this
,
param
);
auto
fallback_handle
=
handle
();
for
(
auto
kernel
:
kerns
)
{
megdnn_assert
(
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NHWC
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW88
||
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW44
,
"invalid conv format"
);
auto
run
=
[
param
,
kernel
](
size_t
index
,
size_t
thread_id
)
{
CpuNDRange
ndrange_id
(
kernel
.
global_size
,
index
);
kernel
.
kern
(
param
,
{
thread_id
,
ndrange_id
});
};
static_cast
<
naive
::
HandleImpl
*>
(
fallback_handle
)
->
dispatch_kern
(
run
,
kernel
.
global_size
.
total_size
());
}
}
void
ConvolutionImpl
::
exec_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
)
{
auto
kerns
=
ncb_algo_dispatch_kern
(
algo
,
param
);
auto
kerns
=
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_kern
(
this
,
param
);
auto
fallback_handle
=
handle
();
for
(
auto
kernel
:
kerns
)
{
megdnn_assert
(
param
.
filter_meta
.
format
==
Param
::
Format
::
NCHW
||
...
...
@@ -215,10 +293,13 @@ ConvolutionImpl::Algorithm* ConvolutionImpl::get_algorithm_heuristic_with_ncb(
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
)
{
for
(
auto
i
:
get_all_algorithms_with_ncb
(
param
))
{
if
(
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
)
&&
ncb_algo_get_workspace
(
i
,
param
)
<=
workspace_limit_in_bytes
)
{
size_t
need_workspace
=
static_cast
<
AlgoBase
*>
(
i
)
->
get_workspace
(
this
,
param
);
bool
usable_reproducible
=
static_cast
<
AlgoBase
*>
(
i
)
->
usable_reproducible
(
this
,
param
,
AlgoSelectionStrategy
::
HEURISTIC
,
reproducible
);
if
(
usable_reproducible
&&
need_workspace
<=
workspace_limit_in_bytes
)
{
return
i
;
}
}
...
...
dnn/src/fallback/convolution/opr_impl.h
浏览文件 @
b8b000db
...
...
@@ -39,12 +39,26 @@ public:
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
,
_megdnn_workspace
workspace
)
override
;
void
exec_preprocess
(
const
TensorLayout
&
src_layout
,
_megdnn_tensor_in
filter
,
const
TensorLayout
&
dst_layout
,
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
override
;
//! implemented by get_workspace_with_ncb()
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
)
override
;
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
override
;
size_t
get_preprocess_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
)
override
;
//! implemented by get_all_algorithms_with_ncb()
std
::
vector
<
Algorithm
*>
get_all_algorithms
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
...
...
@@ -70,6 +84,8 @@ public:
ptrdiff_t
inp_s
[
4
],
out_s
[
4
];
Param
::
ComputeMode
compute_mode
;
size_t
nr_threads
;
//! weight_preprocess info
const
PreprocessedFilter
*
preprocessed_filter
;
};
//! memory param for kernels with non-contiguous batch
...
...
@@ -169,6 +185,23 @@ public:
virtual
SmallVector
<
NCBKern
>
dispatch_kern
(
ConvolutionImpl
*
opr
,
const
NCBKernSizeParam
&
param
)
const
=
0
;
virtual
SmallVector
<
NCBKern
>
dispatch_preprocess_kern
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the layouts of weight_prerocess dst
virtual
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
{};
};
//! get the workspace when weight_prerocess
virtual
size_t
get_preprocess_workspace
(
ConvolutionImpl
*
,
const
NCBKernSizeParam
&
)
const
{
return
0
_z
;
};
//! Temporarily used to identify whether the matmul algorithm is
//! is_preferred.
virtual
bool
is_preferred
(
ConvolutionImpl
*
,
...
...
@@ -192,6 +225,9 @@ public:
protected:
virtual
void
exec_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
);
virtual
void
exec_preprocess_with_ncb_kern
(
const
NCBKernParam
&
param
,
Algorithm
*
algo
);
virtual
std
::
vector
<
Algorithm
*>
get_all_algorithms_with_ncb
(
const
NCBKernSizeParam
&
param
);
...
...
@@ -199,21 +235,6 @@ protected:
const
NCBKernSizeParam
&
param
,
size_t
workspace_limit_in_bytes
,
bool
reproducible
=
false
);
//! get kernel pointer
virtual
SmallVector
<
NCBKern
>
ncb_algo_dispatch_kern
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
)
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
dispatch_kern
(
this
,
param
);
}
//! get algo workspace
virtual
size_t
ncb_algo_get_workspace
(
Algorithm
*
algo
,
const
NCBKernSizeParam
&
param
)
{
return
static_cast
<
AlgoBase
*>
(
algo
)
->
get_workspace
(
this
,
param
);
}
/*!
* the default impl iterates over all ncb_1g_get_all_algorithms()
* and return the first one whose workspace does not exceed the limit.
*/
const
char
*
get_algorithm_set_name
()
const
override
;
class
AlgoFallback
;
...
...
@@ -231,14 +252,16 @@ private:
const
NCBKernSizeParam
&
param
,
size_t
workspace_size
=
std
::
numeric_limits
<
size_t
>::
max
());
NCBKernSizeParam
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
);
NCBKernParam
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
NCBKernSizeParam
make_ncb_kern_size_param
(
const
TensorLayout
&
src
,
const
TensorLayout
&
filter
,
const
TensorLayout
&
dst
,
const
PreprocessedFilter
*
preprocessed_filter
);
NCBKernParam
make_ncb_kern_param
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
);
};
class
ConvolutionBackwardDataImpl
:
public
naive
::
ConvolutionBackwardDataImpl
{
...
...
dnn/src/naive/conv_bias/opr_impl.cpp
浏览文件 @
b8b000db
...
...
@@ -80,14 +80,15 @@ size_t ConvBiasForwardImpl::get_workspace_in_bytes(const TensorLayout& src,
void
ConvBiasForwardImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_in
bias
,
_megdnn_tensor_in
z
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
MIDOUT_BEGIN
(
megdnn_naive_conv_bias_fwd
)
{
dt_byte
*
workspace_ptr
=
workspace
.
raw_ptr
;
// ============================w * f + b================================
auto
filter_meta
=
check_exec
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
,
workspace
.
size
);
auto
filter_meta
=
check_exec
(
src
.
layout
,
filter
.
layout
,
bias
.
layout
,
z
.
layout
,
dst
.
layout
,
workspace
.
size
,
preprocessed_filter
);
auto
sfb
=
dst
;
if
(
bias
.
layout
.
dtype
.
enumv
()
!=
dst
.
layout
.
dtype
.
enumv
())
{
// intermediate result
...
...
dnn/src/naive/conv_bias/opr_impl.h
浏览文件 @
b8b000db
...
...
@@ -61,9 +61,7 @@ public:
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"conv_bias exec_preprocess is not impl yet"
);
}
_megdnn_workspace
)
override
{}
const
char
*
get_algorithm_set_name
()
const
override
;
};
...
...
dnn/src/naive/convolution/convolution.cpp
浏览文件 @
b8b000db
...
...
@@ -28,11 +28,11 @@ using namespace naive;
void
ConvolutionForwardImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
filter
,
_megdnn_tensor_out
dst
,
const
PreprocessedFilter
*
,
const
PreprocessedFilter
*
preprocessed_filter
,
_megdnn_workspace
workspace
)
{
MIDOUT_BEGIN
(
megdnn_naive_conv_fwd
)
{
auto
filter_meta
=
check_exec
(
src
.
layout
,
filter
.
layout
,
dst
.
layout
,
workspace
.
size
);
workspace
.
size
,
preprocessed_filter
);
using
ComputeMode
=
Param
::
ComputeMode
;
#define DISPATCH_CMODE(in_dt, out_dt, in_ct, out_ct, comp_ct, cmode) \
do { \
...
...
dnn/src/naive/convolution/opr_impl.h
浏览文件 @
b8b000db
...
...
@@ -44,9 +44,7 @@ class ConvolutionForwardImpl: public ConvolutionForward {
void
exec_preprocess
(
const
TensorLayout
&
,
_megdnn_tensor_in
,
const
TensorLayout
&
,
PreprocessedFilter
*
,
_megdnn_workspace
)
override
{
megdnn_throw
(
"convolution exec_preprocess in not impl yet"
);
}
_megdnn_workspace
)
override
{}
SmallVector
<
TensorLayout
>
deduce_preprocessed_filter_layout
(
const
TensorLayout
&
,
const
TensorLayout
&
,
...
...
dnn/test/common/opr_proxy.h
浏览文件 @
b8b000db
...
...
@@ -18,6 +18,9 @@
#include "test/common/workspace_wrapper.h"
#include <algorithm>
#include <memory>
namespace
megdnn
{
namespace
test
{
...
...
@@ -32,6 +35,9 @@ struct OprProxyDefaultImpl
template
<
typename
Opr
>
struct
OprProxy
:
public
OprProxyDefaultImpl
<
Opr
>
{};
template
<
typename
Opr
>
struct
OprWeightPreprocessProxy
:
public
OprProxyDefaultImpl
<
Opr
>
{};
template
<
typename
Opr
>
struct
OprProxyVectorToSingle
{};
...
...
@@ -139,6 +145,28 @@ struct OprProxyProfilingBase
typename
Opr
::
Algorithm
*
target_algo
=
nullptr
;
OprProxyProfilingBase
(
bool
profile
=
false
)
{
m_profiling
=
profile
;
}
//! used for alloc tensor for weight preprocess
static
std
::
shared_ptr
<
TensorNDArray
>
alloc_tensors
(
Handle
*
handle
,
const
TensorLayoutArray
&
layouts
)
{
auto
deleter
=
[
handle
](
TensorNDArray
*
ptr
)
{
for
(
auto
&&
i
:
*
ptr
)
{
auto
pdata
=
static_cast
<
dt_byte
*>
(
i
.
raw_ptr
)
+
i
.
layout
.
span
().
low_byte
;
megdnn_free
(
handle
,
pdata
);
}
delete
ptr
;
};
std
::
shared_ptr
<
TensorNDArray
>
ret
{
new
TensorNDArray
,
deleter
};
for
(
size_t
i
=
0
;
i
<
layouts
.
size
();
++
i
)
{
auto
span
=
layouts
[
i
].
span
();
ret
->
emplace_back
(
static_cast
<
dt_byte
*>
(
megdnn_malloc
(
handle
,
span
.
dist_byte
()))
-
span
.
low_byte
,
layouts
[
i
]);
}
return
ret
;
}
};
template
<
class
Opr
>
...
...
@@ -207,7 +235,6 @@ DEF_PROF3(LocalShareBackwardData);
DEF_PROF3
(
LocalShareBackwardFilter
);
#undef DEF_PROF3
//! TODO: it should adapt weight preprocess later
template
<
>
struct
OprProxy
<
ConvolutionForward
>
:
public
OprProxyProfilingTernary
<
ConvolutionForward
>
{
...
...
@@ -263,6 +290,100 @@ struct OprProxy<ConvolutionForward>
}
};
template
<
>
struct
OprWeightPreprocessProxy
<
ConvolutionForward
>
:
public
OprProxyProfilingTernary
<
ConvolutionForward
>
{
using
OprProxyProfilingTernary
<
ConvolutionForward
>::
OprProxyProfilingTernary
;
void
exec
(
ConvolutionForward
*
opr
,
const
TensorNDArray
&
tensors
)
{
megdnn_assert
(
tensors
.
size
()
==
3
);
if
(
!
Base
::
W
.
valid
())
{
Base
::
W
=
WorkspaceWrapper
(
opr
->
handle
(),
0
);
}
if
(
Base
::
m_profiling
&&
!
Base
::
target_algo
)
{
size_t
min_time
=
std
::
numeric_limits
<
size_t
>::
max
();
for
(
auto
algo
:
opr
->
get_all_algorithms
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
))
{
opr
->
execution_policy
().
algorithm
=
algo
;
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvolutionForward
::
PreprocessedFilter
preprocessed_filter
{
algo
,
*
preprocess_tensors
};
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
for
(
size_t
times
=
0
;
times
<
Base
::
warmup_times
;
++
times
)
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
Timer
timer
;
timer
.
start
();
for
(
size_t
times
=
0
;
times
<
Base
::
exec_times
;
++
times
)
{
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
}
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
timer
.
stop
();
printf
(
"%.3fms %s
\n
"
,
timer
.
get_time_in_us
()
/
1e3
,
algo
->
name
());
if
(
min_time
>
timer
.
get_time_in_us
())
{
min_time
=
timer
.
get_time_in_us
();
Base
::
target_algo
=
algo
;
}
}
opr
->
execution_policy
().
algorithm
=
Base
::
target_algo
;
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
Base
::
target_algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvolutionForward
::
PreprocessedFilter
preprocessed_filter
{
Base
::
target_algo
,
*
preprocess_tensors
};
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
}
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
Base
::
target_algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvolutionForward
::
PreprocessedFilter
preprocessed_filter
{
Base
::
target_algo
,
*
preprocess_tensors
};
if
(
!
Base
::
target_algo
)
{
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
}
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
}
//! handle weight preprocess
std
::
shared_ptr
<
TensorNDArray
>
weight_prerocess
(
ConvolutionForward
*
opr
,
const
TensorNDArray
&
tensors
,
ConvolutionForward
::
Algorithm
*
algo
)
{
auto
weight_perprocess_layouts
=
opr
->
deduce_preprocessed_filter_layout
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
);
auto
preprocessed_filter_tensors_ptr
=
alloc_tensors
(
opr
->
handle
(),
weight_perprocess_layouts
);
ConvolutionForward
::
PreprocessedFilter
preprocessed_filter
{
algo
,
*
preprocessed_filter_tensors_ptr
};
size_t
preprocess_workspace_size
=
opr
->
get_preprocess_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
);
WorkspaceWrapper
preprocess_workspace
(
opr
->
handle
(),
preprocess_workspace_size
);
opr
->
exec_preprocess
(
tensors
[
0
].
layout
,
tensors
[
1
],
tensors
[
2
].
layout
,
&
preprocessed_filter
,
preprocess_workspace
.
workspace
());
return
preprocessed_filter_tensors_ptr
;
}
};
template
<
class
Opr
>
struct
OprProxyProfiling5
:
public
OprProxyProfilingBase
<
Opr
,
5
>
{
...
...
@@ -329,11 +450,9 @@ struct OprProxyProfiling5 : public OprProxyProfilingBase<Opr, 5> {
DEF_PROF5
(
DeformableConvForward
);
DEF_PROF5
(
DeformableConvBackwardFilter
);
//DEF_PROF5(ConvBiasForward);
DEF_PROF5
(
BatchConvBiasForward
);
#undef DEF_PROF5
//! TODO: it should adapt weight preprocess later
template
<
>
struct
OprProxy
<
ConvBiasForward
>
:
public
OprProxyProfiling5
<
ConvBiasForward
>
{
using
OprProxyProfiling5
<
ConvBiasForward
>::
OprProxyProfiling5
;
...
...
@@ -390,6 +509,106 @@ struct OprProxy<ConvBiasForward> : public OprProxyProfiling5<ConvBiasForward> {
}
};
template
<
>
struct
OprWeightPreprocessProxy
<
ConvBiasForward
>
:
public
OprProxyProfiling5
<
ConvBiasForward
>
{
using
OprProxyProfiling5
<
ConvBiasForward
>::
OprProxyProfiling5
;
void
exec
(
ConvBiasForward
*
opr
,
const
TensorNDArray
&
tensors
)
{
megdnn_assert
(
tensors
.
size
()
==
5
);
if
(
!
Base
::
W
.
valid
())
{
Base
::
W
=
WorkspaceWrapper
(
opr
->
handle
(),
0
);
}
if
(
Base
::
m_profiling
&&
!
Base
::
target_algo
)
{
size_t
min_time
=
std
::
numeric_limits
<
size_t
>::
max
();
for
(
auto
algo
:
opr
->
get_all_algorithms
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
))
{
opr
->
execution_policy
().
algorithm
=
algo
;
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvBiasForward
::
PreprocessedFilter
preprocessed_filter
{
algo
,
*
preprocess_tensors
};
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
for
(
size_t
times
=
0
;
times
<
Base
::
warmup_times
;
++
times
)
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
tensors
[
4
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
Timer
timer
;
timer
.
start
();
for
(
size_t
times
=
0
;
times
<
Base
::
exec_times
;
++
times
)
{
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
tensors
[
4
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
}
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
timer
.
stop
();
printf
(
"%.3fms %s
\n
"
,
timer
.
get_time_in_us
()
/
1e3
,
algo
->
name
());
if
(
min_time
>
timer
.
get_time_in_us
())
{
min_time
=
timer
.
get_time_in_us
();
Base
::
target_algo
=
algo
;
}
}
opr
->
execution_policy
().
algorithm
=
Base
::
target_algo
;
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
Base
::
target_algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvBiasForward
::
PreprocessedFilter
preprocessed_filter
{
Base
::
target_algo
,
*
preprocess_tensors
};
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
}
auto
preprocess_tensors
=
weight_prerocess
(
opr
,
tensors
,
Base
::
target_algo
);
megcoreSynchronize
(
opr
->
handle
()
->
megcore_computing_handle
());
ConvBiasForward
::
PreprocessedFilter
preprocessed_filter
{
Base
::
target_algo
,
*
preprocess_tensors
};
if
(
!
Base
::
target_algo
)
{
auto
workspace_size
=
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
,
&
preprocessed_filter
);
Base
::
W
.
update
(
workspace_size
);
}
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
tensors
[
4
],
&
preprocessed_filter
,
Base
::
W
.
workspace
());
}
//! handle weight preprocess
std
::
shared_ptr
<
TensorNDArray
>
weight_prerocess
(
ConvBiasForward
*
opr
,
const
TensorNDArray
&
tensors
,
ConvBiasForward
::
Algorithm
*
algo
)
{
auto
weight_perprocess_layouts
=
opr
->
deduce_preprocessed_filter_layout
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
);
auto
preprocessed_filter_tensors_ptr
=
alloc_tensors
(
opr
->
handle
(),
weight_perprocess_layouts
);
ConvBiasForward
::
PreprocessedFilter
preprocessed_filter
{
algo
,
*
preprocessed_filter_tensors_ptr
};
size_t
preprocess_workspace_size
=
opr
->
get_preprocess_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
);
WorkspaceWrapper
preprocess_workspace
(
opr
->
handle
(),
preprocess_workspace_size
);
opr
->
exec_preprocess
(
tensors
[
0
].
layout
,
tensors
[
1
],
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
,
&
preprocessed_filter
,
preprocess_workspace
.
workspace
());
return
preprocessed_filter_tensors_ptr
;
}
};
template
<
class
Opr
>
struct
OprProxyProfiling8
:
public
OprProxyProfilingBase
<
Opr
,
8
>
{
using
Base
=
OprProxyProfilingBase
<
Opr
,
8
>
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录