Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c2435d15
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
c2435d15
编写于
3月 22, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
perf(imperative): specialize adaptive pooling
GitOrigin-RevId: 01e14184580fc00e6725d2a7bf90ca374b47eccc
上级
8fcbe825
变更
8
显示空白变更内容
内联
并排
Showing
8 changed file
with
207 addition
and
36 deletion
+207
-36
dnn/src/common/basic_types.cpp
dnn/src/common/basic_types.cpp
+1
-1
imperative/python/megengine/functional/nn.py
imperative/python/megengine/functional/nn.py
+10
-18
imperative/python/src/tensor.cpp
imperative/python/src/tensor.cpp
+6
-2
imperative/python/src/tensor_utils.cpp
imperative/python/src/tensor_utils.cpp
+54
-0
imperative/python/src/tensor_utils.h
imperative/python/src/tensor_utils.h
+2
-0
imperative/src/impl/ops/adaptive_pooling.cpp
imperative/src/impl/ops/adaptive_pooling.cpp
+129
-0
imperative/src/impl/ops/specializations.cpp
imperative/src/impl/ops/specializations.cpp
+0
-14
src/core/include/megbrain/ir/ops.td
src/core/include/megbrain/ir/ops.td
+5
-1
未找到文件。
dnn/src/common/basic_types.cpp
浏览文件 @
c2435d15
...
...
@@ -191,7 +191,7 @@ bool TensorShape::is_empty() const {
return
true
;
}
}
return
false
;
return
ndim
==
0
;
}
/* ===================== TensorLayout ===================== */
...
...
imperative/python/megengine/functional/nn.py
浏览文件 @
c2435d15
...
...
@@ -11,7 +11,12 @@ from functools import lru_cache
from
typing
import
NamedTuple
,
Optional
,
Sequence
,
Tuple
,
Union
from
..core
import
_config
from
..core._imperative_rt.core2
import
Const
,
apply
,
dtype_promotion
from
..core._imperative_rt.core2
import
(
Const
,
adaptive_pool2d_cpp
,
apply
,
dtype_promotion
,
)
from
..core._imperative_rt.ops
import
SubgraphBuilder
as
_SubgraphBuilder
from
..core._imperative_rt.ops
import
get_global_rng_seed
as
_get_global_rng_seed
from
..core.ops
import
builtin
...
...
@@ -691,19 +696,12 @@ def adaptive_max_pool2d(
Args:
inp: input tensor.
oshp: OH, OW)` size of the output shape.
oshp:
`(
OH, OW)` size of the output shape.
Returns:
output tensor.
"""
if
isinstance
(
oshp
,
int
):
oshp
=
(
oshp
,
oshp
)
conv_format
=
_config
.
_get_actual_op_param
(
"NCHW"
,
_config
.
__conv_format
)
op
=
builtin
.
AdaptivePooling
(
mode
=
"max"
,
format
=
conv_format
,)
oshp
=
astensor1d
(
oshp
,
inp
,
dtype
=
"int32"
,
device
=
inp
.
device
)
(
output
,)
=
apply
(
op
,
inp
,
oshp
)
return
output
return
adaptive_pool2d_cpp
(
inp
,
oshp
,
"MAX"
)
def
adaptive_avg_pool2d
(
...
...
@@ -715,18 +713,12 @@ def adaptive_avg_pool2d(
Args:
inp: input tensor.
oshp: OH, OW)` size of the output shape.
oshp:
`(
OH, OW)` size of the output shape.
Returns:
output tensor.
"""
if
isinstance
(
oshp
,
int
):
oshp
=
(
oshp
,
oshp
)
op
=
builtin
.
AdaptivePooling
(
mode
=
"average"
,
format
=
"NCHW"
,)
oshp
=
astensor1d
(
oshp
,
inp
,
dtype
=
"int32"
,
device
=
inp
.
device
)
(
output
,)
=
apply
(
op
,
inp
,
oshp
)
return
output
return
adaptive_pool2d_cpp
(
inp
,
oshp
,
"AVERAGE"
)
def
deformable_psroi_pooling
(
...
...
imperative/python/src/tensor.cpp
浏览文件 @
c2435d15
...
...
@@ -430,6 +430,7 @@ WRAP_FUNC_PY35(squeeze_cpp);
WRAP_FUNC_PY35
(
transpose_cpp
);
WRAP_FUNC_PY35
(
broadcast_cpp
);
WRAP_FUNC_PY35
(
reshape_cpp
);
WRAP_FUNC_PY35
(
adaptive_pool2d_cpp
);
WRAP_FUNC_PY35
(
Const
);
WRAP_FUNC_PY35
(
astype_cpp
);
WRAP_FUNC_PY35
(
convert_single_value_cpp
);
...
...
@@ -584,6 +585,7 @@ void init_tensor(py::module m) {
MGE_PY_INTERFACE
(
transpose_cpp
,
transpose_cpp
),
MGE_PY_INTERFACE
(
broadcast_cpp
,
broadcast_cpp
),
MGE_PY_INTERFACE
(
reshape_cpp
,
reshape_cpp
),
MGE_PY_INTERFACE
(
adaptive_pool2d_cpp
,
adaptive_pool2d_cpp
),
MGE_PY_INTERFACE
(
Const
,
Const
),
MGE_PY_INTERFACE
(
astype_cpp
,
astype_cpp
),
MGE_PY_INTERFACE
(
convert_single_value_cpp
,
convert_single_value_cpp
),
...
...
@@ -991,8 +993,10 @@ void init_tensor(py::module m) {
m
.
def
(
"is_tracing_module"
,
[
=
]
{
return
get_module_trace
()
->
enabled
();
});
m
.
def
(
"set_module_trace_hook"
,
[](
py
::
function
function
)
{
module_trace_hook
=
function
;
});
m
.
def
(
"set_module_trace_hook"
,
[](
py
::
function
function
)
{
module_trace_hook
=
function
;
module_trace_hook
.
inc_ref
();
});
m
.
def
(
"begin_record_values"
,
[]
{
Value
::
begin_record_values
();
});
...
...
imperative/python/src/tensor_utils.cpp
浏览文件 @
c2435d15
...
...
@@ -948,6 +948,7 @@ std::tuple<std::vector<int32_t>, bool> tuple2vector(py::object shape) {
py
::
tuple
tup
=
py
::
reinterpret_borrow
<
py
::
tuple
>
(
shape
);
for
(
size_t
i
=
0
;
i
<
tup
.
size
();
++
i
)
{
if
(
!
PyLong_Check
(
tup
[
i
].
ptr
()))
{
shp
.
clear
();
return
{
shp
,
false
};
}
else
{
shp
.
push_back
(
tup
[
i
].
cast
<
int32_t
>
());
...
...
@@ -1108,6 +1109,52 @@ py::object _reshape_cpp(py::handle inp_hdl, py::handle args) {
return
ret
[
0
];
}
py
::
object
_adaptive_pool2d_cpp
(
py
::
handle
inp_hdl
,
py
::
handle
shape_val_hdl
,
py
::
handle
pool_mode_hdl
)
{
py
::
object
shape_hdl
=
py
::
reinterpret_borrow
<
py
::
object
>
(
shape_val_hdl
);
py
::
list
shps
(
0
);
if
(
!
PyTuple_Check
(
shape_val_hdl
.
ptr
()))
{
shps
.
append
(
PyLong_AsLong
(
shape_val_hdl
.
ptr
()));
shps
.
append
(
PyLong_AsLong
(
shape_val_hdl
.
ptr
()));
shape_hdl
=
py
::
reinterpret_borrow
<
py
::
object
>
(
shps
);
}
py
::
object
shape_tuple
;
try
{
shape_tuple
=
_make_shape_tuple
(
shape_hdl
);
}
catch
(
py
::
error_already_set
&
err
)
{
shape_tuple
=
py
::
reinterpret_borrow
<
py
::
object
>
(
shape_hdl
);
}
auto
mode_string
=
pool_mode_hdl
.
cast
<
std
::
string
>
();
::
megdnn
::
param
::
AdaptivePooling
::
Mode
pool_mode
=
::
megdnn
::
param
::
AdaptivePooling
::
Mode
::
MAX
;
if
(
mode_string
.
compare
(
std
::
string
(
"AVERAGE"
))
==
0
)
{
pool_mode
=
::
megdnn
::
param
::
AdaptivePooling
::
Mode
::
AVERAGE
;
}
auto
[
shape
,
fastpath
]
=
tuple2vector
(
shape_tuple
);
fastpath
&=
enable_fastpath
(
inp_hdl
);
std
::
shared_ptr
<
OpDef
>
op
;
std
::
vector
<
PyObject
*>
p
;
py
::
object
shape_tensor
;
op
=
AdaptivePooling
::
make
(
pool_mode
,
::
megdnn
::
param
::
AdaptivePooling
::
Format
::
NCHW
,
shape
);
if
(
fastpath
)
{
p
.
resize
(
2
);
}
else
{
p
.
resize
(
3
);
shape_tensor
=
_astensor1d_cpp
(
shape_hdl
,
py
::
cast
((
mgb
::
DType
)
dtype
::
Int32
()),
getattr
(
inp_hdl
,
"device"
),
inp_hdl
);
p
[
2
]
=
shape_tensor
.
ptr
();
}
py
::
object
Op
=
py
::
cast
(
op
);
p
[
0
]
=
Op
.
ptr
();
p
[
1
]
=
inp_hdl
.
ptr
();
py
::
tuple
ret
=
py
::
reinterpret_steal
<
py
::
object
>
(
py_apply
(
NULL
,
p
.
data
(),
p
.
size
()));
return
ret
[
0
];
}
py
::
object
_getitem_cpp
(
py
::
handle
inp_hdl
,
py
::
handle
idx_hdl
)
{
py
::
tuple
try_res
=
_try_cond_take
(
inp_hdl
,
idx_hdl
);
if
(
try_res
.
size
()
==
2
)
{
...
...
@@ -1506,6 +1553,13 @@ PyObject* reshape_cpp(PyObject* self, PyObject* const* args, size_t nargs) {
PYEXT17_TRANSLATE_EXC_RET
(
nullptr
)
}
PyObject
*
adaptive_pool2d_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
)
{
try
{
return
_adaptive_pool2d_cpp
(
args
[
0
],
args
[
1
],
args
[
2
]).
release
().
ptr
();
}
PYEXT17_TRANSLATE_EXC_RET
(
nullptr
)
}
PyObject
*
Const
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
)
{
try
{
return
_Const
(
args
[
0
],
args
[
1
],
args
[
2
],
args
[
3
]).
release
().
ptr
();
...
...
imperative/python/src/tensor_utils.h
浏览文件 @
c2435d15
...
...
@@ -24,6 +24,8 @@ PyObject* broadcast_cpp(PyObject* self, PyObject* const* args, size_t nargs);
PyObject
*
reshape_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
PyObject
*
adaptive_pool2d_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
PyObject
*
Const
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
PyObject
*
astype_cpp
(
PyObject
*
self
,
PyObject
*
const
*
args
,
size_t
nargs
);
...
...
imperative/src/impl/ops/adaptive_pooling.cpp
0 → 100644
浏览文件 @
c2435d15
#include "megbrain/opr/dnn/adaptive_pooling.h"
#include "../algo_chooser.h"
#include "../blob_manager_impl.h"
#include "../dnn_op_helper.h"
#include "../op_trait.h"
#include "megbrain/imperative/ops/autogen.h"
#include "megbrain/opr/io.h"
namespace
mgb
::
imperative
{
namespace
{
namespace
adaptive_pooling
{
auto
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
&&
pool
=
static_cast
<
const
AdaptivePooling
&>
(
def
);
OperatorNodeConfig
config
{
pool
.
make_name
()};
size_t
nr_inp
=
inputs
.
size
();
if
(
nr_inp
>
1
)
{
return
opr
::
AdaptivePooling
::
make
(
inputs
[
0
],
inputs
[
1
],
pool
.
param
(),
config
);
}
HostTensorND
hv
=
HostTensorND
(
inputs
[
0
]
->
comp_node
(),
{
2
},
dtype
::
Int32
());
auto
*
ptr
=
hv
.
ptr
<
dt_int32
>
();
ptr
[
0
]
=
pool
.
shape
[
0
];
ptr
[
1
]
=
pool
.
shape
[
1
];
auto
graph
=
inputs
[
0
]
->
owner_graph
();
auto
target_shape
=
opr
::
ImmutableTensor
::
make
(
*
graph
,
hv
,
config
);
return
opr
::
AdaptivePooling
::
make
(
inputs
[
0
],
target_shape
,
pool
.
param
(),
config
);
}
std
::
tuple
<
SmallVector
<
LogicalTensorDesc
>
,
bool
>
infer_output_attrs_fallible
(
const
OpDef
&
def
,
const
SmallVector
<
LogicalTensorDesc
>&
inputs
)
{
auto
&&
pool
=
static_cast
<
const
AdaptivePooling
&>
(
def
);
size_t
nr_inp
=
inputs
.
size
();
auto
&&
src
=
inputs
[
0
];
TensorLayout
dst_layout
(
src
.
layout
.
dtype
);
if
(
src
.
layout
.
is_empty
())
{
return
{{{
TensorLayout
(
src
.
layout
.
dtype
),
src
.
comp_node
}},
false
};
}
dst_layout
.
ndim
=
4u
;
if
(
nr_inp
==
1
)
{
dst_layout
[
0
]
=
src
.
layout
[
0
];
dst_layout
[
1
]
=
src
.
layout
[
1
];
dst_layout
[
2
]
=
pool
.
shape
[
0
];
dst_layout
[
3
]
=
pool
.
shape
[
1
];
}
else
{
auto
&&
tshp
=
inputs
[
1
];
if
(
tshp
.
value
.
empty
())
{
return
{{{
TensorLayout
(
src
.
layout
.
dtype
),
src
.
comp_node
}},
false
};
}
mgb_assert
(
tshp
.
layout
.
ndim
==
1
,
"target shape of AdaptivePooling expects ndim=1; got ndim=%lu actually"
,
tshp
.
layout
.
ndim
);
dst_layout
[
0
]
=
src
.
layout
[
0
];
dst_layout
[
1
]
=
src
.
layout
[
1
];
auto
*
ptr
=
tshp
.
value
.
ptr
<
dt_int32
>
();
dst_layout
[
2
]
=
ptr
[
0
];
dst_layout
[
3
]
=
ptr
[
1
];
}
dst_layout
.
init_contiguous_stride
();
return
{{{
dst_layout
,
src
.
comp_node
}},
true
};
}
SmallVector
<
TensorPtr
>
apply_on_physical_tensor
(
const
OpDef
&
def
,
const
SmallVector
<
TensorPtr
>&
inputs
,
SmallVector
<
LogicalTensorDesc
>&
output_descs
,
const
bool
&
validated
)
{
auto
&&
pool
=
static_cast
<
const
AdaptivePooling
&>
(
def
);
auto
&&
cn
=
inputs
[
0
]
->
comp_node
();
using
TensorND
=
megdnn
::
TensorND
;
auto
&&
src_layout
=
inputs
[
0
]
->
layout
();
TensorLayout
dst_layout
=
output_descs
[
0
].
layout
;
if
(
!
validated
)
{
TensorShape
tshp
;
dst_layout
[
0
]
=
src_layout
[
0
];
dst_layout
[
1
]
=
src_layout
[
1
];
if
(
inputs
.
size
()
==
2
)
{
auto
&&
tshp_nd
=
inputs
[
1
];
cg
::
copy_tensor_value_to_shape
(
tshp
,
tshp_nd
->
get_value
().
proxy_to_default_cpu
());
dst_layout
[
2
]
=
tshp
[
0
];
dst_layout
[
3
]
=
tshp
[
1
];
}
else
{
dst_layout
[
2
]
=
pool
.
shape
[
0
];
dst_layout
[
3
]
=
pool
.
shape
[
1
];
}
dst_layout
.
init_contiguous_stride
();
}
size_t
IH
=
src_layout
[
2
],
IW
=
src_layout
[
3
],
OH
=
dst_layout
[
2
],
OW
=
dst_layout
[
3
];
DnnOprCaller
<
megdnn
::
Pooling
>
dnn_opr
(
cn
);
auto
&&
param
=
dnn_opr
.
op
->
param
();
param
.
mode
=
pool
.
mode
;
param
.
format
=
pool
.
format
;
param
.
pad_h
=
param
.
pad_w
=
0
;
param
.
stride_h
=
floor
(
IH
/
OH
);
param
.
stride_w
=
floor
(
IW
/
OW
);
param
.
window_h
=
IH
-
(
OH
-
1
)
*
param
.
stride_h
;
param
.
window_w
=
IW
-
(
OW
-
1
)
*
param
.
stride_w
;
TensorND
src
=
inputs
[
0
]
->
dnn_tensor
();
DeviceTensorND
dst
=
BlobManager
::
inst
()
->
alloc_workspace_with_defrag
(
cn
,
dst_layout
);
size_t
sz
=
setup_algo
<
megdnn
::
Pooling
>
(
{
src_layout
,
dst_layout
},
dnn_opr
.
op
.
get
(),
0
,
false
,
false
,
cn
,
::
megdnn
::
param
::
ExecutionPolicy
{},
false
);
megdnn
::
Workspace
dnn_wk
;
if
(
sz
)
{
TensorLayout
w_layout
({
sz
},
dtype
::
Byte
());
dnn_wk
=
dnn_opr
.
create_workspace
(
w_layout
);
}
dnn_opr
.
op
->
exec
(
src
,
dst
.
as_megdnn
(),
dnn_wk
);
return
{
Tensor
::
make
(
dst
)};
}
OP_TRAIT_REG
(
AdaptivePooling
,
AdaptivePooling
)
.
apply_on_var_node
(
apply_on_var_node
)
.
infer_output_attrs_fallible
(
infer_output_attrs_fallible
)
.
apply_on_physical_tensor
(
apply_on_physical_tensor
)
.
fallback
();
}
// namespace adaptive_pooling
}
// namespace
}
// namespace mgb::imperative
imperative/src/impl/ops/specializations.cpp
浏览文件 @
c2435d15
...
...
@@ -293,20 +293,6 @@ OP_TRAIT_REG(TopK, TopK).apply_on_var_node(apply_on_var_node).fallback();
}
// namespace top_k
}
// namespace
namespace
{
namespace
adaptive_pooling
{
auto
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
&&
pool
=
static_cast
<
const
AdaptivePooling
&>
(
def
);
OperatorNodeConfig
config
{
pool
.
make_name
()};
return
opr
::
AdaptivePooling
::
make
(
inputs
[
0
],
inputs
[
1
],
pool
.
param
(),
config
);
}
OP_TRAIT_REG
(
AdaptivePooling
,
AdaptivePooling
)
.
apply_on_var_node
(
apply_on_var_node
)
.
fallback
();
}
// namespace adaptive_pooling
}
// namespace
namespace
{
namespace
batch_conv_bias
{
auto
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
...
...
src/core/include/megbrain/ir/ops.td
浏览文件 @
c2435d15
...
...
@@ -69,7 +69,11 @@ def GroupLocal: MgbHashableOp<"GroupLocal", [ConvolutionParam]>;
def Pooling: MgbHashableOp<"Pooling", [PoolingParam, ExecutionPolicyParamBase<"policy">]>;
def AdaptivePooling : MgbHashableOp<"AdaptivePooling", [AdaptivePoolingParam]>;
def AdaptivePooling : MgbHashableOp<"AdaptivePooling", [AdaptivePoolingParam]> {
let extraArguments = (ins
MgbArrayAttr<MgbI32Attr>:$shape
);
}
def ROIPooling: MgbHashableOp<"ROIPooling", [ROIPoolingParam]>;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录