提交 c2435d15 编写于 作者: M Megvii Engine Team

perf(imperative): specialize adaptive pooling

GitOrigin-RevId: 01e14184580fc00e6725d2a7bf90ca374b47eccc
上级 8fcbe825
......@@ -191,7 +191,7 @@ bool TensorShape::is_empty() const {
return true;
}
}
return false;
return ndim == 0;
}
/* ===================== TensorLayout ===================== */
......
......@@ -11,7 +11,12 @@ from functools import lru_cache
from typing import NamedTuple, Optional, Sequence, Tuple, Union
from ..core import _config
from ..core._imperative_rt.core2 import Const, apply, dtype_promotion
from ..core._imperative_rt.core2 import (
Const,
adaptive_pool2d_cpp,
apply,
dtype_promotion,
)
from ..core._imperative_rt.ops import SubgraphBuilder as _SubgraphBuilder
from ..core._imperative_rt.ops import get_global_rng_seed as _get_global_rng_seed
from ..core.ops import builtin
......@@ -691,19 +696,12 @@ def adaptive_max_pool2d(
Args:
inp: input tensor.
oshp: OH, OW)` size of the output shape.
oshp: `(OH, OW)` size of the output shape.
Returns:
output tensor.
"""
if isinstance(oshp, int):
oshp = (oshp, oshp)
conv_format = _config._get_actual_op_param("NCHW", _config.__conv_format)
op = builtin.AdaptivePooling(mode="max", format=conv_format,)
oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device)
(output,) = apply(op, inp, oshp)
return output
return adaptive_pool2d_cpp(inp, oshp, "MAX")
def adaptive_avg_pool2d(
......@@ -715,18 +713,12 @@ def adaptive_avg_pool2d(
Args:
inp: input tensor.
oshp: OH, OW)` size of the output shape.
oshp: `(OH, OW)` size of the output shape.
Returns:
output tensor.
"""
if isinstance(oshp, int):
oshp = (oshp, oshp)
op = builtin.AdaptivePooling(mode="average", format="NCHW",)
oshp = astensor1d(oshp, inp, dtype="int32", device=inp.device)
(output,) = apply(op, inp, oshp)
return output
return adaptive_pool2d_cpp(inp, oshp, "AVERAGE")
def deformable_psroi_pooling(
......
......@@ -430,6 +430,7 @@ WRAP_FUNC_PY35(squeeze_cpp);
WRAP_FUNC_PY35(transpose_cpp);
WRAP_FUNC_PY35(broadcast_cpp);
WRAP_FUNC_PY35(reshape_cpp);
WRAP_FUNC_PY35(adaptive_pool2d_cpp);
WRAP_FUNC_PY35(Const);
WRAP_FUNC_PY35(astype_cpp);
WRAP_FUNC_PY35(convert_single_value_cpp);
......@@ -584,6 +585,7 @@ void init_tensor(py::module m) {
MGE_PY_INTERFACE(transpose_cpp, transpose_cpp),
MGE_PY_INTERFACE(broadcast_cpp, broadcast_cpp),
MGE_PY_INTERFACE(reshape_cpp, reshape_cpp),
MGE_PY_INTERFACE(adaptive_pool2d_cpp, adaptive_pool2d_cpp),
MGE_PY_INTERFACE(Const, Const),
MGE_PY_INTERFACE(astype_cpp, astype_cpp),
MGE_PY_INTERFACE(convert_single_value_cpp, convert_single_value_cpp),
......@@ -991,8 +993,10 @@ void init_tensor(py::module m) {
m.def("is_tracing_module", [=] { return get_module_trace()->enabled(); });
m.def("set_module_trace_hook",
[](py::function function) { module_trace_hook = function; });
m.def("set_module_trace_hook", [](py::function function) {
module_trace_hook = function;
module_trace_hook.inc_ref();
});
m.def("begin_record_values", [] { Value::begin_record_values(); });
......
......@@ -948,6 +948,7 @@ std::tuple<std::vector<int32_t>, bool> tuple2vector(py::object shape) {
py::tuple tup = py::reinterpret_borrow<py::tuple>(shape);
for (size_t i = 0; i < tup.size(); ++i) {
if (!PyLong_Check(tup[i].ptr())) {
shp.clear();
return {shp, false};
} else {
shp.push_back(tup[i].cast<int32_t>());
......@@ -1108,6 +1109,52 @@ py::object _reshape_cpp(py::handle inp_hdl, py::handle args) {
return ret[0];
}
py::object _adaptive_pool2d_cpp(
py::handle inp_hdl, py::handle shape_val_hdl, py::handle pool_mode_hdl) {
py::object shape_hdl = py::reinterpret_borrow<py::object>(shape_val_hdl);
py::list shps(0);
if (!PyTuple_Check(shape_val_hdl.ptr())) {
shps.append(PyLong_AsLong(shape_val_hdl.ptr()));
shps.append(PyLong_AsLong(shape_val_hdl.ptr()));
shape_hdl = py::reinterpret_borrow<py::object>(shps);
}
py::object shape_tuple;
try {
shape_tuple = _make_shape_tuple(shape_hdl);
} catch (py::error_already_set& err) {
shape_tuple = py::reinterpret_borrow<py::object>(shape_hdl);
}
auto mode_string = pool_mode_hdl.cast<std::string>();
::megdnn::param::AdaptivePooling::Mode pool_mode =
::megdnn::param::AdaptivePooling::Mode::MAX;
if (mode_string.compare(std::string("AVERAGE")) == 0) {
pool_mode = ::megdnn::param::AdaptivePooling::Mode::AVERAGE;
}
auto [shape, fastpath] = tuple2vector(shape_tuple);
fastpath &= enable_fastpath(inp_hdl);
std::shared_ptr<OpDef> op;
std::vector<PyObject*> p;
py::object shape_tensor;
op = AdaptivePooling::make(
pool_mode, ::megdnn::param::AdaptivePooling::Format::NCHW, shape);
if (fastpath) {
p.resize(2);
} else {
p.resize(3);
shape_tensor = _astensor1d_cpp(
shape_hdl, py::cast((mgb::DType)dtype::Int32()),
getattr(inp_hdl, "device"), inp_hdl);
p[2] = shape_tensor.ptr();
}
py::object Op = py::cast(op);
p[0] = Op.ptr();
p[1] = inp_hdl.ptr();
py::tuple ret =
py::reinterpret_steal<py::object>(py_apply(NULL, p.data(), p.size()));
return ret[0];
}
py::object _getitem_cpp(py::handle inp_hdl, py::handle idx_hdl) {
py::tuple try_res = _try_cond_take(inp_hdl, idx_hdl);
if (try_res.size() == 2) {
......@@ -1506,6 +1553,13 @@ PyObject* reshape_cpp(PyObject* self, PyObject* const* args, size_t nargs) {
PYEXT17_TRANSLATE_EXC_RET(nullptr)
}
PyObject* adaptive_pool2d_cpp(PyObject* self, PyObject* const* args, size_t nargs) {
try {
return _adaptive_pool2d_cpp(args[0], args[1], args[2]).release().ptr();
}
PYEXT17_TRANSLATE_EXC_RET(nullptr)
}
PyObject* Const(PyObject* self, PyObject* const* args, size_t nargs) {
try {
return _Const(args[0], args[1], args[2], args[3]).release().ptr();
......
......@@ -24,6 +24,8 @@ PyObject* broadcast_cpp(PyObject* self, PyObject* const* args, size_t nargs);
PyObject* reshape_cpp(PyObject* self, PyObject* const* args, size_t nargs);
PyObject* adaptive_pool2d_cpp(PyObject* self, PyObject* const* args, size_t nargs);
PyObject* Const(PyObject* self, PyObject* const* args, size_t nargs);
PyObject* astype_cpp(PyObject* self, PyObject* const* args, size_t nargs);
......
#include "megbrain/opr/dnn/adaptive_pooling.h"
#include "../algo_chooser.h"
#include "../blob_manager_impl.h"
#include "../dnn_op_helper.h"
#include "../op_trait.h"
#include "megbrain/imperative/ops/autogen.h"
#include "megbrain/opr/io.h"
namespace mgb::imperative {
namespace {
namespace adaptive_pooling {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& pool = static_cast<const AdaptivePooling&>(def);
OperatorNodeConfig config{pool.make_name()};
size_t nr_inp = inputs.size();
if (nr_inp > 1) {
return opr::AdaptivePooling::make(inputs[0], inputs[1], pool.param(), config);
}
HostTensorND hv = HostTensorND(inputs[0]->comp_node(), {2}, dtype::Int32());
auto* ptr = hv.ptr<dt_int32>();
ptr[0] = pool.shape[0];
ptr[1] = pool.shape[1];
auto graph = inputs[0]->owner_graph();
auto target_shape = opr::ImmutableTensor::make(*graph, hv, config);
return opr::AdaptivePooling::make(inputs[0], target_shape, pool.param(), config);
}
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
auto&& pool = static_cast<const AdaptivePooling&>(def);
size_t nr_inp = inputs.size();
auto&& src = inputs[0];
TensorLayout dst_layout(src.layout.dtype);
if (src.layout.is_empty()) {
return {{{TensorLayout(src.layout.dtype), src.comp_node}}, false};
}
dst_layout.ndim = 4u;
if (nr_inp == 1) {
dst_layout[0] = src.layout[0];
dst_layout[1] = src.layout[1];
dst_layout[2] = pool.shape[0];
dst_layout[3] = pool.shape[1];
} else {
auto&& tshp = inputs[1];
if (tshp.value.empty()) {
return {{{TensorLayout(src.layout.dtype), src.comp_node}}, false};
}
mgb_assert(
tshp.layout.ndim == 1,
"target shape of AdaptivePooling expects ndim=1; got ndim=%lu actually",
tshp.layout.ndim);
dst_layout[0] = src.layout[0];
dst_layout[1] = src.layout[1];
auto* ptr = tshp.value.ptr<dt_int32>();
dst_layout[2] = ptr[0];
dst_layout[3] = ptr[1];
}
dst_layout.init_contiguous_stride();
return {{{dst_layout, src.comp_node}}, true};
}
SmallVector<TensorPtr> apply_on_physical_tensor(
const OpDef& def, const SmallVector<TensorPtr>& inputs,
SmallVector<LogicalTensorDesc>& output_descs, const bool& validated) {
auto&& pool = static_cast<const AdaptivePooling&>(def);
auto&& cn = inputs[0]->comp_node();
using TensorND = megdnn::TensorND;
auto&& src_layout = inputs[0]->layout();
TensorLayout dst_layout = output_descs[0].layout;
if (!validated) {
TensorShape tshp;
dst_layout[0] = src_layout[0];
dst_layout[1] = src_layout[1];
if (inputs.size() == 2) {
auto&& tshp_nd = inputs[1];
cg::copy_tensor_value_to_shape(
tshp, tshp_nd->get_value().proxy_to_default_cpu());
dst_layout[2] = tshp[0];
dst_layout[3] = tshp[1];
} else {
dst_layout[2] = pool.shape[0];
dst_layout[3] = pool.shape[1];
}
dst_layout.init_contiguous_stride();
}
size_t IH = src_layout[2], IW = src_layout[3], OH = dst_layout[2],
OW = dst_layout[3];
DnnOprCaller<megdnn::Pooling> dnn_opr(cn);
auto&& param = dnn_opr.op->param();
param.mode = pool.mode;
param.format = pool.format;
param.pad_h = param.pad_w = 0;
param.stride_h = floor(IH / OH);
param.stride_w = floor(IW / OW);
param.window_h = IH - (OH - 1) * param.stride_h;
param.window_w = IW - (OW - 1) * param.stride_w;
TensorND src = inputs[0]->dnn_tensor();
DeviceTensorND dst =
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout);
size_t sz = setup_algo<megdnn::Pooling>(
{src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn,
::megdnn::param::ExecutionPolicy{}, false);
megdnn::Workspace dnn_wk;
if (sz) {
TensorLayout w_layout({sz}, dtype::Byte());
dnn_wk = dnn_opr.create_workspace(w_layout);
}
dnn_opr.op->exec(src, dst.as_megdnn(), dnn_wk);
return {Tensor::make(dst)};
}
OP_TRAIT_REG(AdaptivePooling, AdaptivePooling)
.apply_on_var_node(apply_on_var_node)
.infer_output_attrs_fallible(infer_output_attrs_fallible)
.apply_on_physical_tensor(apply_on_physical_tensor)
.fallback();
} // namespace adaptive_pooling
} // namespace
} // namespace mgb::imperative
......@@ -293,20 +293,6 @@ OP_TRAIT_REG(TopK, TopK).apply_on_var_node(apply_on_var_node).fallback();
} // namespace top_k
} // namespace
namespace {
namespace adaptive_pooling {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& pool = static_cast<const AdaptivePooling&>(def);
OperatorNodeConfig config{pool.make_name()};
return opr::AdaptivePooling::make(inputs[0], inputs[1], pool.param(), config);
}
OP_TRAIT_REG(AdaptivePooling, AdaptivePooling)
.apply_on_var_node(apply_on_var_node)
.fallback();
} // namespace adaptive_pooling
} // namespace
namespace {
namespace batch_conv_bias {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
......
......@@ -69,7 +69,11 @@ def GroupLocal: MgbHashableOp<"GroupLocal", [ConvolutionParam]>;
def Pooling: MgbHashableOp<"Pooling", [PoolingParam, ExecutionPolicyParamBase<"policy">]>;
def AdaptivePooling : MgbHashableOp<"AdaptivePooling", [AdaptivePoolingParam]>;
def AdaptivePooling : MgbHashableOp<"AdaptivePooling", [AdaptivePoolingParam]> {
let extraArguments = (ins
MgbArrayAttr<MgbI32Attr>:$shape
);
}
def ROIPooling: MgbHashableOp<"ROIPooling", [ROIPoolingParam]>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册