提交 7e9aa742 编写于 作者: M Megvii Engine Team

feat(imperative/amp): enable auto_convert_format by default

GitOrigin-RevId: 71ae311fed63b33642574056041068dd904be723
上级 fc0f4546
...@@ -1017,10 +1017,14 @@ void init_tensor(py::module m) { ...@@ -1017,10 +1017,14 @@ void init_tensor(py::module m) {
using namespace std::placeholders; using namespace std::placeholders;
self.compiled = std::make_shared<CompiledTransformation>( self.compiled = std::make_shared<CompiledTransformation>(
*self.trace_result, self.record_input_shapes); *self.trace_result, self.record_input_shapes);
self.compiled->set_value_comparator( self.compiled->set_value_comparator(
std::bind(&Trace::compare_value, this, _1, _2)); std::bind(&Trace::compare_value, this, _1, _2));
self.options_visitor(py::cast(&self.compiled->options())); self.options_visitor(py::cast(&self.compiled->options()));
self.compiled->compile(); try {
self.compiled->compile();
} catch (const std::exception& e) {
mgb_log_error(e.what());
}
} }
// register transformations // register transformations
if (self.compiled) { if (self.compiled) {
......
...@@ -5,6 +5,7 @@ import megengine as mge ...@@ -5,6 +5,7 @@ import megengine as mge
import megengine.functional as F import megengine.functional as F
from megengine import tensor from megengine import tensor
from megengine.autodiff import GradManager from megengine.autodiff import GradManager
from megengine.jit import trace
def test_basic(): def test_basic():
...@@ -30,26 +31,29 @@ def test_basic(): ...@@ -30,26 +31,29 @@ def test_basic():
assert b.format == "nchw" assert b.format == "nchw"
def _compare_nchw_nhwc(data, func): def _compare_nchw_nhwc(data, func, is_symbolic=None):
x1 = tensor(data, format="nchw") x1 = tensor(data, format="nchw")
x2 = tensor(data.transpose(0, 2, 3, 1), format="nhwc") x2 = tensor(data.transpose(0, 2, 3, 1), format="nhwc")
if is_symbolic is not None:
func = trace(func, symbolic=is_symbolic)
out1 = func(x1) out1 = func(x1)
with mge.config._override(auto_format_convert=True): out2 = func(x2)
out2 = func(x2)
np.testing.assert_almost_equal(out1, out2, decimal=5) np.testing.assert_almost_equal(out1, out2, decimal=5)
def test_dimshuffle(): @pytest.mark.parametrize("is_symbolic", [None])
def test_dimshuffle(is_symbolic):
def func(x): def func(x):
out = F.transpose(x, [2, 3, 0, 1]) out = F.transpose(x, [2, 3, 0, 1])
assert out.format == "default" assert out.format == "default"
return out.numpy() return out.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_reshape(): @pytest.mark.parametrize("is_symbolic", [None])
def test_reshape(is_symbolic):
# maintain NHWC format # maintain NHWC format
def func(x): def func(x):
out = F.reshape(x, (1, 2, 6, 2)) out = F.reshape(x, (1, 2, 6, 2))
...@@ -58,7 +62,7 @@ def test_reshape(): ...@@ -58,7 +62,7 @@ def test_reshape():
return out.numpy() return out.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
# not maintain NHWC format # not maintain NHWC format
def func2(x): def func2(x):
...@@ -66,18 +70,20 @@ def test_reshape(): ...@@ -66,18 +70,20 @@ def test_reshape():
assert out.format == "default" assert out.format == "default"
return out.numpy() return out.numpy()
_compare_nchw_nhwc(data, func2) _compare_nchw_nhwc(data, func2, is_symbolic)
def test_flatten(): @pytest.mark.parametrize("is_symbolic", [None])
def test_flatten(is_symbolic):
def func(x): def func(x):
return F.flatten(x).numpy() return F.flatten(x).numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_broadcast(): @pytest.mark.parametrize("is_symbolic", [None])
def test_broadcast(is_symbolic):
# maintain NHWC format # maintain NHWC format
def func(x): def func(x):
out = F.broadcast_to(x, (4, 3, 2, 3)) out = F.broadcast_to(x, (4, 3, 2, 3))
...@@ -86,7 +92,7 @@ def test_broadcast(): ...@@ -86,7 +92,7 @@ def test_broadcast():
return out.numpy() return out.numpy()
data = np.arange(0, 24).reshape((4, 3, 2, 1)) data = np.arange(0, 24).reshape((4, 3, 2, 1))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
# not maintain NHWC format # not maintain NHWC format
def func2(x): def func2(x):
...@@ -94,30 +100,32 @@ def test_broadcast(): ...@@ -94,30 +100,32 @@ def test_broadcast():
assert out.format == "default" assert out.format == "default"
return out.numpy() return out.numpy()
_compare_nchw_nhwc(data, func2) _compare_nchw_nhwc(data, func2, is_symbolic)
@pytest.mark.skip("repeat cannot maintain format yet") @pytest.mark.skip("repeat cannot maintain format yet")
def test_repeat(): @pytest.mark.parametrize("is_symbolic", [None])
def test_repeat(is_symbolic):
def func(x): def func(x):
rst = F.repeat(x, 3, axis=1) rst = F.repeat(x, 3, axis=1)
assert rst.format == x.format assert rst.format == x.format
return rst.numpy() return rst.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_getshape(): @pytest.mark.parametrize("is_symbolic", [None])
def test_getshape(is_symbolic):
def func(x): def func(x):
return x.shape return x.shape
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
@pytest.mark.skip("symbolic shape is not supported yet") @pytest.mark.skip("symbolic shape is not supported yet")
def test_get_symbolic_shape(): def test_get_symbolic_shape(is_symbolic):
from megengine.core._trace_option import set_symbolic_shape from megengine.core._trace_option import set_symbolic_shape
origin_opt = set_symbolic_shape(True) origin_opt = set_symbolic_shape(True)
...@@ -126,77 +134,84 @@ def test_get_symbolic_shape(): ...@@ -126,77 +134,84 @@ def test_get_symbolic_shape():
return x.shape.numpy() return x.shape.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
set_symbolic_shape(origin_opt) set_symbolic_shape(origin_opt)
def test_getvalue(): @pytest.mark.parametrize("is_symbolic", [None])
def test_getvalue(is_symbolic):
def func(x): def func(x):
return x.numpy() return x.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_get_set_subtensor(): @pytest.mark.parametrize("is_symbolic", [None])
def test_get_set_subtensor(is_symbolic):
def get_subtensor(x): def get_subtensor(x):
return x[:, :1, :2, :3].numpy() return x[:, :1, :2, :3].numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, get_subtensor) _compare_nchw_nhwc(data, get_subtensor, is_symbolic)
def set_subtensor(x): def set_subtensor(x):
x[:, :1, :2, :3] = 0 x[:, :1, :2, :3] = 0
return x.numpy() return x.numpy()
_compare_nchw_nhwc(data, set_subtensor) _compare_nchw_nhwc(data, set_subtensor, is_symbolic)
def test_get_set_advanced_indexing(): @pytest.mark.parametrize("is_symbolic", [None])
def test_get_set_advanced_indexing(is_symbolic):
def get_advanced_indexing(x): def get_advanced_indexing(x):
x = x[:, : mge.tensor(2), : mge.tensor(2), [1, 2]].numpy() x = x[:, : mge.tensor(2), : mge.tensor(2), [1, 2]].numpy()
return x return x
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, get_advanced_indexing) _compare_nchw_nhwc(data, get_advanced_indexing, is_symbolic)
def set_advanced_indexing(x): def set_advanced_indexing(x):
x[:, : mge.tensor(2), : mge.tensor([2]), [1,]] = 0 x[:, : mge.tensor(2), : mge.tensor([2]), [1,]] = 0
return x.numpy() return x.numpy()
_compare_nchw_nhwc(data, set_advanced_indexing) _compare_nchw_nhwc(data, set_advanced_indexing, is_symbolic)
def test_typecvt(): @pytest.mark.parametrize("is_symbolic", [None])
def test_typecvt(is_symbolic):
def typecvt(x): def typecvt(x):
return x.astype("float16").numpy() return x.astype("float16").numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, typecvt) _compare_nchw_nhwc(data, typecvt, is_symbolic)
def test_elemwise(): @pytest.mark.parametrize("is_symbolic", [None])
def test_elemwise(is_symbolic):
def elemwise(x): def elemwise(x):
return (x * 2 + x / 2).numpy() return (x * 2 + x / 2).numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, elemwise) _compare_nchw_nhwc(data, elemwise, is_symbolic)
def test_concat(): @pytest.mark.parametrize("is_symbolic", [None])
def test_concat(is_symbolic):
def func(x): def func(x):
rst = F.concat([x / 2, x * 2], axis=1) rst = F.concat([x / 2, x * 2], axis=1)
assert rst.format == x.format assert rst.format == x.format
return rst.numpy() return rst.numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"mode", ["bilinear", "nearest"], "mode", ["bilinear", "nearest"],
) )
def test_interpolate(mode): @pytest.mark.parametrize("is_symbolic", [None])
def test_interpolate(mode, is_symbolic):
def func(x): def func(x):
if x.format == "nhwc": if x.format == "nhwc":
with mge.config._override(conv_format="NHWC"): with mge.config._override(conv_format="NHWC"):
...@@ -208,10 +223,11 @@ def test_interpolate(mode): ...@@ -208,10 +223,11 @@ def test_interpolate(mode):
# NHWC interpolate only suppoted channel is 1 or 3 # NHWC interpolate only suppoted channel is 1 or 3
data = np.arange(0, 48).reshape((1, 3, 4, 4)).astype("float32") data = np.arange(0, 48).reshape((1, 3, 4, 4)).astype("float32")
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_conv2d(): @pytest.mark.parametrize("is_symbolic", [None])
def test_conv2d(is_symbolic):
def conv2d(x): def conv2d(x):
if x.format == "nhwc": if x.format == "nhwc":
with mge.config._override(conv_format="NHWC"): with mge.config._override(conv_format="NHWC"):
...@@ -226,10 +242,11 @@ def test_conv2d(): ...@@ -226,10 +242,11 @@ def test_conv2d():
return F.conv2d(x, F.ones((3, 2, 1, 1)), F.ones((1, 3, 1, 1))).numpy() return F.conv2d(x, F.ones((3, 2, 1, 1)), F.ones((1, 3, 1, 1))).numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, conv2d) _compare_nchw_nhwc(data, conv2d, is_symbolic)
def test_group_conv2d(): @pytest.mark.parametrize("is_symbolic", [None])
def test_group_conv2d(is_symbolic):
def conv2d(x): def conv2d(x):
if x.format == "nhwc": if x.format == "nhwc":
with mge.config._override(conv_format="NHWC"): with mge.config._override(conv_format="NHWC"):
...@@ -247,10 +264,11 @@ def test_group_conv2d(): ...@@ -247,10 +264,11 @@ def test_group_conv2d():
).numpy() ).numpy()
data = np.arange(0, 48).reshape((1, 4, 3, 4)) data = np.arange(0, 48).reshape((1, 4, 3, 4))
_compare_nchw_nhwc(data, conv2d) _compare_nchw_nhwc(data, conv2d, is_symbolic)
def test_bn(): @pytest.mark.parametrize("is_symbolic", [None])
def test_bn(is_symbolic):
def func(x): def func(x):
if x.format == "nhwc": if x.format == "nhwc":
with mge.config._override(bn_format="dim_111c"): with mge.config._override(bn_format="dim_111c"):
...@@ -279,14 +297,15 @@ def test_bn(): ...@@ -279,14 +297,15 @@ def test_bn():
)[0].numpy() )[0].numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
@pytest.mark.parametrize( @pytest.mark.parametrize(
"pooling", "pooling",
[F.max_pool2d, F.avg_pool2d, F.adaptive_avg_pool2d, F.adaptive_max_pool2d], [F.max_pool2d, F.avg_pool2d, F.adaptive_avg_pool2d, F.adaptive_max_pool2d],
) )
def test_pooling2d(pooling): @pytest.mark.parametrize("is_symbolic", [None])
def test_pooling2d(pooling, is_symbolic):
def func(x): def func(x):
if x.format == "nhwc": if x.format == "nhwc":
with mge.config._override(conv_format="NHWC"): with mge.config._override(conv_format="NHWC"):
...@@ -297,18 +316,25 @@ def test_pooling2d(pooling): ...@@ -297,18 +316,25 @@ def test_pooling2d(pooling):
return pooling(x.astype("float32"), 2).numpy() return pooling(x.astype("float32"), 2).numpy()
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
_compare_nchw_nhwc(data, func) _compare_nchw_nhwc(data, func, is_symbolic)
def test_backward(): @pytest.mark.parametrize("is_symbolic", [None])
def test_backward(is_symbolic):
data = np.arange(0, 24).reshape((1, 2, 3, 4)) data = np.arange(0, 24).reshape((1, 2, 3, 4))
x = tensor(data.transpose(0, 2, 3, 1), format="nhwc") x = tensor(data.transpose(0, 2, 3, 1), format="nhwc")
w = mge.tensor(np.ones((3, 1, 1, 2)), format="nhwc") w = mge.tensor(np.ones((3, 1, 1, 2)), format="nhwc")
b = mge.tensor(np.ones((1, 1, 1, 3)), format="nhwc") b = mge.tensor(np.ones((1, 1, 1, 3)), format="nhwc")
gm = GradManager().attach([w, b]) gm = GradManager().attach([w, b])
def func(x, w, b):
return F.conv2d(x, w, b)
with gm: with gm:
with mge.config._override(auto_format_convert=True, conv_format="NHWC"): with mge.config._override(auto_format_convert=True, conv_format="NHWC"):
x = F.conv2d(x, w, b) if is_symbolic is not None:
func = trace(func, symbolic=is_symbolic)
x = func(x, w, b)
# TODO: fix manually convert to NHWC, usually used in detection head # TODO: fix manually convert to NHWC, usually used in detection head
# x = x.transpose(0, 2, 3, 1).reshape(1, 18, 2) # x = x.transpose(0, 2, 3, 1).reshape(1, 18, 2)
gm.backward(x) gm.backward(x)
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
namespace mgb::imperative { namespace mgb::imperative {
class FormattedTensorValue final : public ValueImpl<FormattedTensorValue> { class FormattedTensorValue final : public ObjectValue<FormattedTensorValue> {
private: private:
ValueRef m_value; ValueRef m_value;
Format m_format; Format m_format;
...@@ -26,10 +26,6 @@ public: ...@@ -26,10 +26,6 @@ public:
const Format& format() const { return m_format; } const Format& format() const { return m_format; }
TypedValueRef<FormattedTensorValue> as(const Format::Type& target) const;
TypedValueRef<FormattedTensorValue> to(
const Format::Type& target, const std::string& scope = "") const;
void clear() override { void clear() override {
m_value = {}; m_value = {};
m_format = {}; m_format = {};
...@@ -40,23 +36,18 @@ public: ...@@ -40,23 +36,18 @@ public:
void on_unwatch() override { m_value.unwatch(); } void on_unwatch() override { m_value.unwatch(); }
}; };
/**
* \brief simulates scalar because megbrain graph system don't support scalar
*
* Assume that we has 'a = ScalarValue(b)', thus 'a.shape == []', 'b.shape == [1]'.
* This transformation simulates scalars with a flag. If a value is ScalarValue, it is
* scalar, vice versa. So there is not scalar down this layer.
*/
class FormatTransformation final : public Transformation { class FormatTransformation final : public Transformation {
private: private:
bool m_auto_convert = false; // enable auto_convert by default to be easier to use.
bool m_auto_convert = true;
ObjectType<FormattedTensorValue> m_value_type{"FormattedTensorValue"};
public: public:
std::vector<ValueRef> apply_transformation( ValueRefList apply_transformation(
const Operator& op, Span<ValueRef> inputs) override; const Operator& op, Span<ValueRef> inputs) override;
ValueRef unwrap(ValueRef value) override { ValueRef unwrap(ValueRef value) override {
mgb_assert(!value.is<FormattedTensorValue>()); mgb_assert(!value.is(m_value_type));
return value; return value;
} }
...@@ -65,6 +56,22 @@ public: ...@@ -65,6 +56,22 @@ public:
} }
void set_auto_convert(bool enabled) { m_auto_convert = enabled; } void set_auto_convert(bool enabled) { m_auto_convert = enabled; }
bool get_auto_convert() const { return m_auto_convert; } bool get_auto_convert() const { return m_auto_convert; }
const Type<FormattedTensorValue>& value_type() const { return m_value_type; }
inline ValueRef unwrap_input(const ValueRef& input) const;
inline ValueRefList unwrap_inputs(const Span<ValueRef>& inputs) const;
inline ValueRef wrap_output(
const ValueRef& output, Format::Type type = Format::Type::DEFAULT) const;
inline ValueRefList wrap_outputs(
const ValueRefList& outputs,
Format::Type type = Format::Type::DEFAULT) const;
TypedValueRef<FormattedTensorValue> as(
const FormattedTensorValue&, const Format::Type& target) const;
TypedValueRef<FormattedTensorValue> to(
const FormattedTensorValue&, const Format::Type& target,
const std::string& scope = "") const;
}; };
} // namespace mgb::imperative } // namespace mgb::imperative
...@@ -67,6 +67,7 @@ template <typename T> ...@@ -67,6 +67,7 @@ template <typename T>
class Type : public IType { class Type : public IType {
protected: protected:
Type(std::string name) : IType(std::move(name)) {} Type(std::string name) : IType(std::move(name)) {}
Type(IType&& type) : IType(std::move(type)) {}
// TODO: each type owns an allocator // TODO: each type owns an allocator
public: public:
...@@ -104,6 +105,7 @@ template <typename T> ...@@ -104,6 +105,7 @@ template <typename T>
class ObjectType : public Type<T> { class ObjectType : public Type<T> {
public: public:
ObjectType(std::string name) : Type<T>(name) {} ObjectType(std::string name) : Type<T>(name) {}
ObjectType(IType&& type) : Type<T>(std::move(type)) {}
}; };
/** /**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册