Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
5bdc430e
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
5bdc430e
编写于
5月 24, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mgb/fastrun): fix megbrain fastrun memory overflow bug
GitOrigin-RevId: f56aa5a5059ef6c5ea9d51fe31d89a0d778aa702
上级
d7ddd43f
变更
8
隐藏空白更改
内联
并排
Showing
8 changed file
with
111 addition
and
78 deletion
+111
-78
imperative/src/impl/algo_chooser.h
imperative/src/impl/algo_chooser.h
+3
-2
imperative/src/impl/ops/convolution.cpp
imperative/src/impl/ops/convolution.cpp
+26
-18
imperative/src/impl/ops/matmul.cpp
imperative/src/impl/ops/matmul.cpp
+13
-13
imperative/src/impl/ops/pooling.cpp
imperative/src/impl/ops/pooling.cpp
+10
-8
src/rdnn/impl/algo_chooser.cpp
src/rdnn/impl/algo_chooser.cpp
+13
-6
src/rdnn/impl/profiler.cpp
src/rdnn/impl/profiler.cpp
+38
-29
src/rdnn/include/megbrain/rdnn/algo_chooser.h
src/rdnn/include/megbrain/rdnn/algo_chooser.h
+5
-1
src/rdnn/include/megbrain/rdnn/profiler.h
src/rdnn/include/megbrain/rdnn/profiler.h
+3
-1
未找到文件。
imperative/src/impl/algo_chooser.h
浏览文件 @
5bdc430e
...
...
@@ -11,7 +11,8 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
const
typename
mgb
::
rdnn
::
AlgoChooser
<
Opr
>::
FixedTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
uint32_t
shared_batch_size
,
bool
binary_equal_between_batch
,
bool
no_profiling_on_shape_change
,
CompNode
comp_node
,
megdnn
::
param
::
ExecutionPolicy
execution_policy
,
bool
allow_weight_preprocess
)
{
megdnn
::
param
::
ExecutionPolicy
execution_policy
,
bool
allow_weight_preprocess
,
SmallVector
<
megdnn
::
TensorND
>*
inp_tensornds
=
nullptr
)
{
megdnn
::
AlgorithmCache
::
Key
cache_key
(
megdnn_opr
->
handle
(),
megdnn_opr
->
get_opr_type
(),
layouts
.
data
(),
layouts
.
size
(),
&
megdnn_opr
->
param
(),
sizeof
(
megdnn_opr
->
param
()));
...
...
@@ -39,7 +40,7 @@ MGE_WIN_DECLSPEC_FUC size_t setup_algo(
using
AlgoChooserHelper
=
typename
mgb
::
rdnn
::
AlgoChooser
<
Opr
>::
AlgoChooserHelper
;
AlgoChooserHelper
helper
(
layouts
,
megdnn_opr
,
param_str
,
comp_node
,
execution_policy
,
allow_weight_preprocess
,
desc
);
allow_weight_preprocess
,
desc
,
inp_tensornds
);
megdnn
::
ExecutionPolicy
policy
;
policy
=
mgb
::
rdnn
::
AlgoChooser
<
Opr
>::
get_policy
(
helper
);
...
...
imperative/src/impl/ops/convolution.cpp
浏览文件 @
5bdc430e
...
...
@@ -141,11 +141,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
def
,
inputs
[
0
]
->
layout
().
ndim
,
inputs
[
0
]
->
layout
(),
inputs
[
1
]
->
layout
());
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
using
TensorND
=
megdnn
::
TensorND
;
SmallVector
<
TensorND
>
inp_tensornds
(
inputs
.
size
());
SmallVector
<
TensorND
>
inp_tensornds
(
inputs
.
size
()
+
2
);
TensorLayoutArray
inp_shapes
(
inputs
.
size
()),
oup_shapes
(
output_descs
.
size
());
for
(
unsigned
i
=
0
;
i
<
inputs
.
size
();
++
i
)
{
inp_tensornds
[
i
]
=
inputs
[
i
]
->
dnn_tensor
();
...
...
@@ -168,13 +165,20 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout
empty_shp
({
0
},
inputs
[
0
]
->
dtype
());
empty_shp
.
ndim
=
0
;
DeviceTensorND
empty_bias
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
empty_shp
);
inp_tensornds
[
2
]
=
empty_bias
.
as_megdnn
();
inp_tensornds
[
3
]
=
empty_bias
.
as_megdnn
();
size_t
sz
=
setup_algo
<
megdnn
::
ConvBiasForward
>
(
{
inp_shapes
[
0
],
inp_shapes
[
1
],
empty_shp
,
empty_shp
,
oup_shapes
[
0
]},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
conv
.
policy
(),
false
);
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
conv
.
policy
(),
false
,
&
inp_tensornds
);
// alloc memory
DeviceTensorND
empty_bias
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
empty_shp
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
...
...
@@ -364,9 +368,6 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
def
,
inputs
[
1
]
->
layout
().
ndim
,
inputs
[
0
]
->
layout
(),
inputs
[
1
]
->
layout
(),
cn
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
using
TensorND
=
megdnn
::
TensorND
;
SmallVector
<
TensorND
>
inp_tensornds
(
inputs
.
size
());
TensorLayoutArray
inp_shapes
(
inputs
.
size
()),
oup_shapes
(
output_descs
.
size
());
...
...
@@ -380,7 +381,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t
sz
=
setup_algo
<
megdnn
::
ConvolutionBackwardData
>
(
{
inp_shapes
[
0
],
inp_shapes
[
1
],
oup_shapes
[
0
]},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
convbwd
.
policy
(),
false
);
false
,
cn
,
convbwd
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
out_layout
);
auto
wk
=
Blob
::
make
(
cn
,
sz
);
auto
ptr
=
wk
->
storage
().
get
();
...
...
@@ -542,7 +546,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
// shape infer
size_t
sz
=
setup_algo
<
megdnn
::
Convolution3D
>
(
{
inp_shapes
[
0
],
inp_shapes
[
1
],
oup_shapes
[
0
]},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
conv
.
policy
(),
false
);
false
,
cn
,
conv
.
policy
(),
false
,
&
inp_tensornds
);
// alloc memory
DeviceTensorND
out
=
...
...
@@ -598,8 +602,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
op_def
=
def
.
cast_final_safe
<
Convolution3DBackwardData
>
();
auto
cn
=
inputs
[
0
]
->
comp_node
();
megdnn
::
TensorND
weight
=
inputs
[
0
]
->
dnn_tensor
();
megdnn
::
TensorND
diff
=
inputs
[
1
]
->
dnn_tensor
();
auto
&&
wlayout
=
inputs
[
0
]
->
layout
();
auto
&&
dlayout
=
inputs
[
1
]
->
layout
();
DnnOprCaller
<
megdnn
::
Convolution3DBackwardData
>
caller
(
cn
);
auto
&&
dnn_opr
=
caller
.
op
;
...
...
@@ -608,21 +613,24 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout
&
oup_layout
=
output_descs
[
0
].
layout
;
if
(
!
validated
)
{
megdnn
::
Convolution3DBackwardData
::
deduce_layout_impl
(
w
eight
.
layout
,
diff
.
layout
,
op_def
.
param
(),
oup_layout
);
w
layout
,
d
layout
,
op_def
.
param
(),
oup_layout
);
}
DeviceTensorND
oup
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
SmallVector
<
megdnn
::
TensorND
>
inp_tensornds
(
inputs
.
size
());
inp_tensornds
[
0
]
=
inputs
[
0
]
->
dnn_tensor
();
inp_tensornds
[
1
]
=
inputs
[
1
]
->
dnn_tensor
();
size_t
wk_size
=
setup_algo
<
megdnn
::
Convolution3DBackwardData
>
(
{
w
eight
.
layout
,
diff
.
layout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
);
{
w
layout
,
dlayout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
,
&
inp_tensornds
);
megdnn
::
Workspace
dnn_wk
;
if
(
wk_size
!=
0
)
{
TensorLayout
w_layout
({
wk_size
},
dtype
::
Byte
());
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
dnn_opr
->
exec
(
weight
,
diff
,
oup
.
as_megdnn
(),
dnn_wk
);
dnn_opr
->
exec
(
inp_tensornds
[
0
],
inp_tensornds
[
1
]
,
oup
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
oup
)};
}
...
...
imperative/src/impl/ops/matmul.cpp
浏览文件 @
5bdc430e
...
...
@@ -229,12 +229,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
inp_tensornds
[
0
].
layout
=
layout_a
;
inp_tensornds
[
1
].
layout
=
layout_b
;
}
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
size_t
sz
=
setup_algo
<
megdnn
::
MatrixMul
>
(
{
layout_a
,
layout_b
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
matmul
.
policy
(),
false
);
matmul
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
...
...
@@ -470,21 +469,22 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
return
{
Tensor
::
make
(
out
)};
}
using
TensorND
=
megdnn
::
TensorND
;
TensorND
inp_nd1
=
inp1
->
dnn_tensor
();
inp_nd1
.
layout
=
layout1
;
TensorND
inp_nd2
=
inp2
->
dnn_tensor
();
inp_nd2
.
layout
=
layout2
;
SmallVector
<
megdnn
::
TensorND
>
inp_tensornds
(
2u
);
inp_tensornds
[
0
]
=
inp1
->
dnn_tensor
();
inp_tensornds
[
0
].
layout
=
layout1
;
inp_tensornds
[
1
]
=
inp2
->
dnn_tensor
();
inp_tensornds
[
1
].
layout
=
layout2
;
size_t
sz
=
setup_algo
<
megdnn
::
BatchedMatrixMul
>
(
{
layout1
,
layout2
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
matmul
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
size_t
sz
=
setup_algo
<
megdnn
::
BatchedMatrixMul
>
(
{
layout1
,
layout2
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
matmul
.
policy
(),
false
);
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
auto
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
dnn_opr
.
op
->
exec
(
inp_
nd1
,
inp_nd2
,
out
.
as_megdnn
(),
dnn_wk
);
dnn_opr
.
op
->
exec
(
inp_
tensornds
[
0
],
inp_tensornds
[
1
]
,
out
.
as_megdnn
(),
dnn_wk
);
shp1
[
shp1
.
ndim
-
2
]
=
dst_layout
[
dst_layout
.
ndim
-
2
];
shp1
[
shp1
.
ndim
-
1
]
=
dst_layout
[
dst_layout
.
ndim
-
1
];
...
...
imperative/src/impl/ops/pooling.cpp
浏览文件 @
5bdc430e
...
...
@@ -49,23 +49,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
auto
&&
op_def
=
def
.
cast_final_safe
<
Pooling
>
();
auto
cn
=
inputs
[
0
]
->
comp_node
();
megdnn
::
TensorND
inp_tensornd
=
inputs
[
0
]
->
dnn_tensor
();
DnnOprCaller
<
megdnn
::
Pooling
>
caller
(
cn
);
auto
&&
dnn_opr
=
caller
.
op
;
dnn_opr
->
param
()
=
op_def
.
param
();
SmallVector
<
megdnn
::
TensorND
>
inp_tensornds
(
inputs
.
size
());
inp_tensornds
[
0
]
=
inputs
[
0
]
->
dnn_tensor
();
TensorLayout
&
oup_layout
=
output_descs
[
0
].
layout
;
if
(
!
validated
)
{
megdnn
::
Pooling
::
deduce_layout_impl
(
inp_tensornd
.
layout
,
op_def
.
param
(),
oup_layout
);
inp_tensornd
s
[
0
]
.
layout
,
op_def
.
param
(),
oup_layout
);
}
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
size_t
wk_size
=
setup_algo
<
megdnn
::
Pooling
>
(
{
inp_tensornd
.
layout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
);
{
inp_tensornds
[
0
].
layout
,
oup_layout
},
dnn_opr
.
get
(),
0
,
false
,
false
,
cn
,
op_def
.
policy
(),
false
,
&
inp_tensornds
);
DeviceTensorND
out_devtensor
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
oup_layout
);
megdnn
::
Workspace
dnn_wk
;
if
(
wk_size
)
{
...
...
@@ -73,7 +75,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dnn_wk
=
caller
.
create_workspace
(
w_layout
);
}
dnn_opr
->
exec
(
inp_tensornd
,
out_devtensor
.
as_megdnn
(),
dnn_wk
);
dnn_opr
->
exec
(
inp_tensornd
s
[
0
]
,
out_devtensor
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
out_devtensor
)};
}
...
...
src/rdnn/impl/algo_chooser.cpp
浏览文件 @
5bdc430e
...
...
@@ -265,7 +265,8 @@ std::vector<megdnn::Algorithm::SearchItem> flatten_search_space(
typename
rdnn
::
AlgoChooser
<
_Opr
>::
AlgoChooserHelper
sub_helper
(
to_fixed_layouts
<
_Opr
>
(
_item
.
layouts
),
megdnn_opr
.
get
(),
_item
.
param
,
helper
.
comp_node
(),
helper
.
execution_policy
(),
helper
.
allow_weight_preprocess
(),
helper
.
desc
());
helper
.
allow_weight_preprocess
(),
helper
.
desc
(),
helper
.
get_input
());
auto
space
=
flatten_search_space
<
_Opr
>
(
sub_helper
,
checker
);
ret
.
insert
(
ret
.
end
(),
space
.
begin
(),
space
.
end
());
});
...
...
@@ -488,7 +489,8 @@ AlgoChooser<Opr>::AlgoChooserHelper::AlgoChooserHelper(
const
FixedTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
const
std
::
string
&
param_str
,
const
CompNode
&
cn
,
const
megdnn
::
param
::
ExecutionPolicy
&
execution_policy
,
bool
allow_weight_preprocess
,
const
AlgoChooserDesc
&
desc
)
bool
allow_weight_preprocess
,
const
AlgoChooserDesc
&
desc
,
SmallVector
<
megdnn
::
TensorND
>*
inputs
)
:
m_fastrun_layouts
{
layouts
},
m_incache_layouts
{
layouts
},
m_dnn_opr
{
megdnn_opr
},
...
...
@@ -496,7 +498,8 @@ AlgoChooser<Opr>::AlgoChooserHelper::AlgoChooserHelper(
m_cn
{
cn
},
m_execution_policy
{
execution_policy
},
m_allow_weight_preprocess
{
allow_weight_preprocess
},
m_desc
{
desc
}
{
m_desc
{
desc
},
m_inputs
{
inputs
}
{
auto
fastrun_batch_size
=
desc
.
shared_batch_size
;
if
(
fastrun_batch_size
)
{
...
...
@@ -604,7 +607,7 @@ typename AlgoChooser<Opr>::ImplExecutionPolicy AlgoChooser<Opr>::AlgoChooserHelp
typename
AlgoChooser
<
_Opr
>::
AlgoChooserHelper
sub_helper
(
to_fixed_layouts
<
_Opr
>
(
_item
.
layouts
),
megdnn_opr
.
get
(),
_item
.
param
,
m_cn
,
m_execution_policy
,
m_allow_weight_preprocess
,
m_desc
);
m_desc
,
m_inputs
);
sub_helper
.
profile
(
selected_strategy
);
});
}
...
...
@@ -868,6 +871,7 @@ Maybe<AlgoChooserProfileCache::ResultEntry> AlgoChooser<Opr>::AlgoChooserHelper:
param
.
shapes
[
i
]
=
m_fastrun_layouts
[
i
];
param
.
opr_param
=
m_dnn_opr
->
param
();
param
.
allow_weight_preprocess
=
m_allow_weight_preprocess
;
param
.
inp_tensornds
=
m_inputs
;
Algorithm
*
palgo
=
m_dnn_opr
->
get_algorithm_from_desc
(
policy
.
algo
);
mgb_assert
(
palgo
,
"can not find algo when profile single algo"
);
...
...
@@ -964,7 +968,9 @@ void AlgoChooser<Opr>::AlgoChooserHelper::profile(
if
(
!
policy
.
algo
.
valid
())
continue
;
size_t
workspace_needed
=
get_workspace_size_bytes
(
policy
);
if
(
data_size
+
workspace_needed
>
if
(
m_inputs
!=
nullptr
)
workspace_needed
+=
data_size
;
if
(
workspace_needed
>
m_desc
.
get_workspace_limit
(
m_cn
,
m_execution_policy
.
workspace_limit
))
{
continue
;
}
...
...
@@ -1101,7 +1107,8 @@ std::pair<AlgoAttribute, AlgoAttribute> AlgoChooser<Opr>::AlgoChooserHelper::
const FixedTensorLayouts& layouts, megdnn::Opr* megdnn_opr, \
const std::string& param_str, const CompNode& cn, \
const megdnn::param::ExecutionPolicy& execution_policy, \
bool allow_weight_preprocess, const AlgoChooserDesc& desc); \
bool allow_weight_preprocess, const AlgoChooserDesc& desc, \
SmallVector<megdnn::TensorND>* inputs); \
template typename AlgoChooser<megdnn::Opr>::ImplExecutionPolicy \
AlgoChooser<megdnn::Opr>::AlgoChooserHelper::choose_by_heuristic( \
const ExecutionStrategy& select_strategy) const; \
...
...
src/rdnn/impl/profiler.cpp
浏览文件 @
5bdc430e
...
...
@@ -143,7 +143,7 @@ template <typename Opr>
void
TimedProfiler
<
Opr
>::
preprocess
(
const
TensorLayoutArray
&
,
const
megdnn
::
SmallVector
<
DeviceTensorND
>&
,
UniqPtrWithCN
<
Opr
>&
,
megdnn
::
Workspace
&
,
std
::
array
<
TensorLayout
,
arity
>&
,
std
::
array
<
Device
TensorND
,
arity_in
>&
,
PreprocessFilter
<
Opr
>&
)
{
std
::
array
<
megdnn
::
TensorND
,
arity_in
>&
,
PreprocessFilter
<
Opr
>&
)
{
// Opr is neither convbias nor convolution.This function do nothing.
}
...
...
@@ -154,7 +154,7 @@ void TimedProfiler<megdnn::ConvBias>::preprocess(
const
SmallVector
<
DeviceTensorND
>&
flt_val
,
UniqPtrWithCN
<
megdnn
::
ConvBias
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
Device
TensorND
,
arity_in
>&
inp_val
,
std
::
array
<
megdnn
::
TensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
megdnn
::
ConvBias
>&
prep_flt
)
{
if
(
!
preprocessed_layout
.
empty
())
{
auto
&&
pf
=
prep_flt
;
...
...
@@ -164,8 +164,7 @@ void TimedProfiler<megdnn::ConvBias>::preprocess(
pf
.
tensors
[
i
]
=
flt_val
[
i
].
as_megdnn
();
}
APPLY
(
megdnn_opr
->
exec_preprocess
(
args
...,
&
pf
,
mdn_workspace
),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
].
as_megdnn
(),
inp_val
[
2
].
as_megdnn
()),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
],
inp_val
[
2
]),
array_skip
<
arity_in
-
1
>
(
layouts
));
}
}
...
...
@@ -177,7 +176,7 @@ void TimedProfiler<megdnn::ConvolutionForward>::preprocess(
const
megdnn
::
SmallVector
<
DeviceTensorND
>&
flt_val
,
UniqPtrWithCN
<
megdnn
::
ConvolutionForward
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
Device
TensorND
,
arity_in
>&
inp_val
,
std
::
array
<
megdnn
::
TensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
megdnn
::
ConvolutionForward
>&
prep_flt
)
{
if
(
!
preprocessed_layout
.
empty
())
{
auto
&&
pf
=
prep_flt
;
...
...
@@ -187,8 +186,7 @@ void TimedProfiler<megdnn::ConvolutionForward>::preprocess(
pf
.
tensors
[
i
]
=
flt_val
[
i
].
as_megdnn
();
}
APPLY
(
megdnn_opr
->
exec_preprocess
(
args
...,
&
pf
,
mdn_workspace
),
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
].
as_megdnn
()),
array_skip
<
2
>
(
layouts
));
std
::
forward_as_tuple
(
layouts
[
0
],
inp_val
[
1
]),
array_skip
<
2
>
(
layouts
));
}
}
...
...
@@ -259,8 +257,12 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
std
::
max
(
cn
.
get_free_mem
(),
cn
.
get_max_block_size_available
());
auto
align
=
cn
.
get_mem_addr_alignment
();
size_t
tot_size
=
align
;
for
(
int
i
=
0
;
i
<
arity
;
++
i
)
{
tot_size
+=
layouts
[
i
].
span
().
high_byte
+
align
;
for
(
size_t
i
=
0
;
i
<
arity
;
++
i
)
{
// if input tensornds are given, only consider output tensornds
if
(
param
.
inp_tensornds
!=
nullptr
)
{
if
(
i
>=
(
*
param
.
inp_tensornds
).
size
())
tot_size
+=
layouts
[
i
].
span
().
high_byte
+
align
;
}
}
for
(
const
auto
&
layout
:
preprocessed_layout
)
{
tot_size
+=
layout
.
span
().
high_byte
+
align
;
...
...
@@ -275,20 +277,34 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
#endif
// allocate input and output memory
std
::
array
<
DeviceTensorND
,
arity_in
>
inp_val
;
std
::
array
<
DeviceTensorND
,
arity_out
>
out_val
;
std
::
array
<
DeviceTensorND
,
arity_in
>
inp_dev
;
std
::
array
<
DeviceTensorND
,
arity_out
>
out_dev
;
std
::
array
<
megdnn
::
TensorND
,
arity_in
>
inp_val
;
std
::
array
<
megdnn
::
TensorND
,
arity_out
>
out_val
;
DeviceTensorND
workspace
;
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
inp_val
[
i
].
comp_node
(
cn
).
dtype
(
layouts
[
i
].
dtype
).
resize
(
layouts
[
i
]);
if
(
param
.
inp_tensornds
!=
nullptr
)
{
// if inp_tensornds exists, then reusing it
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
inp_val
[
i
]
=
(
*
param
.
inp_tensornds
)[
i
];
}
}
else
{
// inp_tensornds does not exist, create zero tensor with the same layout
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
inp_dev
[
i
].
comp_node
(
cn
).
dtype
(
layouts
[
i
].
dtype
).
resize
(
layouts
[
i
]);
fill_zero_dev_tensor
(
inp_dev
[
i
]);
inp_val
[
i
]
=
inp_dev
[
i
].
as_megdnn
();
}
}
for
(
int
i
=
0
;
i
<
arity_out
;
++
i
)
{
out_
val
[
i
]
out_
dev
[
i
]
.
comp_node
(
cn
)
.
dtype
(
layouts
[
arity_in
+
i
].
dtype
)
.
resize
(
layouts
[
arity_in
+
i
]);
out_val
[
i
]
=
out_dev
[
i
].
as_megdnn
();
}
megdnn
::
Workspace
mdn_workspace
;
megdnn
::
Workspace
mdn_workspace
;
// allocate workspace
if
(
param
.
workspace
)
{
workspace
.
comp_node
(
cn
).
dtype
(
dtype
::
Byte
()).
resize
({
param
.
workspace
});
...
...
@@ -304,10 +320,6 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
preprocessed_layout
[
i
].
format
};
}
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
fill_zero_dev_tensor
(
inp_val
[
i
]);
}
PreprocessFilter
<
Opr
>
prep_flt
;
preprocess
(
preprocessed_layout
,
flt_val
,
megdnn_opr
,
mdn_workspace
,
layouts
,
inp_val
,
...
...
@@ -322,13 +334,12 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
auto
&&
opr
=
_
(
megdnn_opr
);
PreprocessFilter
<
Opr
>*
pf
=
preprocessed_layout
.
empty
()
?
nullptr
:
&
prep_flt
;
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
APPLY
(
opr
->
exec
(
args
...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
},
/* else */
[
&
](
auto
_
)
{
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
.
as_megdnn
()...,
mdn_workspace
)
,
inp_val
,
out_val
);
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
.
..,
mdn_workspace
),
inp_val
,
out_val
);
});
}
ev_start
->
record
();
...
...
@@ -337,13 +348,11 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
auto
&&
opr
=
_
(
megdnn_opr
);
PreprocessFilter
<
Opr
>*
pf
=
preprocessed_layout
.
empty
()
?
nullptr
:
&
prep_flt
;
APPLY
(
opr
->
exec
(
args
.
as_megdnn
()...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
APPLY
(
opr
->
exec
(
args
...,
pf
,
mdn_workspace
),
inp_val
,
out_val
);
},
/* else */
[
&
](
auto
_
)
{
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
.
as_megdnn
()...,
mdn_workspace
),
inp_val
,
out_val
);
APPLY
(
_
(
megdnn_opr
)
->
exec
(
args
...,
mdn_workspace
),
inp_val
,
out_val
);
});
ev_end
->
record
();
...
...
@@ -370,10 +379,10 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
DeviceTensorStorage
storage
;
for
(
int
i
=
0
;
i
<
arity_in
;
++
i
)
{
inp_
val
[
i
].
reset
(
storage
,
TensorLayout
{});
inp_
dev
[
i
].
reset
(
storage
,
TensorLayout
{});
}
for
(
int
i
=
0
;
i
<
arity_out
;
++
i
)
{
out_
val
[
i
].
reset
(
storage
,
TensorLayout
{});
out_
dev
[
i
].
reset
(
storage
,
TensorLayout
{});
}
for
(
size_t
i
=
0
;
i
<
preprocessed_layout
.
size
();
i
++
)
{
flt_val
[
i
].
reset
(
storage
,
TensorLayout
{});
...
...
src/rdnn/include/megbrain/rdnn/algo_chooser.h
浏览文件 @
5bdc430e
...
...
@@ -60,13 +60,15 @@ public:
megdnn
::
param
::
ExecutionPolicy
m_execution_policy
;
bool
m_allow_weight_preprocess
;
const
AlgoChooserDesc
&
m_desc
;
SmallVector
<
megdnn
::
TensorND
>*
m_inputs
;
public:
MGE_WIN_DECLSPEC_FUC
AlgoChooserHelper
(
const
FixedTensorLayouts
&
layouts
,
Opr
*
megdnn_opr
,
const
std
::
string
&
param_str
,
const
CompNode
&
cn
,
const
megdnn
::
param
::
ExecutionPolicy
&
execution_policy
,
bool
allow_weight_preprocess
,
const
AlgoChooserDesc
&
desc
);
bool
allow_weight_preprocess
,
const
AlgoChooserDesc
&
desc
,
SmallVector
<
megdnn
::
TensorND
>*
inputs
=
nullptr
);
Opr
*
megdnn_opr
()
const
{
return
m_dnn_opr
;
}
...
...
@@ -93,6 +95,8 @@ public:
const
AlgoChooserDesc
&
desc
()
const
{
return
m_desc
;
}
SmallVector
<
megdnn
::
TensorND
>*
get_input
()
const
{
return
m_inputs
;
}
//! construct algo chain by heuristic
ImplExecutionPolicy
choose_by_heuristic
(
const
ExecutionStrategy
&
selected_strategy
)
const
;
...
...
src/rdnn/include/megbrain/rdnn/profiler.h
浏览文件 @
5bdc430e
...
...
@@ -122,6 +122,8 @@ public:
//! filled by profile()
mutable
double
actual_timeout
;
// input
SmallVector
<
megdnn
::
TensorND
>*
inp_tensornds
;
};
struct
Result
{
...
...
@@ -141,7 +143,7 @@ private:
const
megdnn
::
TensorLayoutArray
&
preprocessed_layout
,
const
SmallVector
<
DeviceTensorND
>&
flt_val
,
UniqPtrWithCN
<
Opr
>&
megdnn_opr
,
megdnn
::
Workspace
&
mdn_workspace
,
std
::
array
<
TensorLayout
,
arity
>&
layouts
,
std
::
array
<
Device
TensorND
,
arity_in
>&
inp_val
,
std
::
array
<
megdnn
::
TensorND
,
arity_in
>&
inp_val
,
PreprocessFilter
<
Opr
>&
prep_flt
);
static
TResult
prof_impl
(
const
TParam
&
raw_param
);
static
void
prof_init_device
(
const
TParam
&
raw_param
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录