feat(imperative/amp): enable auto_convert_format by default

GitOrigin-RevId: 71ae311fed63b33642574056041068dd904be723

feat(imperative/amp): enable auto_convert_format by default
GitOrigin-RevId: 71ae311fed63b33642574056041068dd904be723
7e9aa742 · Megvii Engine Team · fc0f4546 · 7e9aa742 · 7e9aa742 · 7e9aa742
5 changed file
--- a/imperative/python/src/tensor.cpp
+++ b/imperative/python/src/tensor.cpp
@@ -1017,10 +1017,14 @@ void init_tensor(py::module m) {
                using namespace std::placeholders;
                self.compiled = std::make_shared<CompiledTransformation>(
                        *self.trace_result, self.record_input_shapes);
-                self.compiled->set_value_comparator(
-                        std::bind(&Trace::compare_value, this, _1, _2));
-                self.options_visitor(py::cast(&self.compiled->options()));
-                self.compiled->compile();
+                    self.compiled->set_value_comparator(
+                            std::bind(&Trace::compare_value, this, _1, _2));
+                    self.options_visitor(py::cast(&self.compiled->options()));
+                try {
+                    self.compiled->compile();
+                } catch (const std::exception& e) {
+                    mgb_log_error(e.what());
+                }
            }
            // register transformations
            if (self.compiled) {

--- a/imperative/python/test/unit/core/test_formatted_tensor.py
+++ b/imperative/python/test/unit/core/test_formatted_tensor.py
@@ -5,6 +5,7 @@ import megengine as mge
 import megengine.functional as F
 from megengine import tensor
 from megengine.autodiff import GradManager
+from megengine.jit import trace


 def test_basic():
@@ -30,26 +31,29 @@ def test_basic():
    assert b.format == "nchw"


-def _compare_nchw_nhwc(data, func):
+def _compare_nchw_nhwc(data, func, is_symbolic=None):
    x1 = tensor(data, format="nchw")
    x2 = tensor(data.transpose(0, 2, 3, 1), format="nhwc")
+    if is_symbolic is not None:
+        func = trace(func, symbolic=is_symbolic)
    out1 = func(x1)
-    with mge.config._override(auto_format_convert=True):
-        out2 = func(x2)
+    out2 = func(x2)
    np.testing.assert_almost_equal(out1, out2, decimal=5)


-def test_dimshuffle():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_dimshuffle(is_symbolic):
    def func(x):
        out = F.transpose(x, [2, 3, 0, 1])
        assert out.format == "default"
        return out.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_reshape():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_reshape(is_symbolic):
    # maintain NHWC format
    def func(x):
        out = F.reshape(x, (1, 2, 6, 2))
@@ -58,7 +62,7 @@ def test_reshape():
        return out.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)

    # not maintain NHWC format
    def func2(x):
@@ -66,18 +70,20 @@ def test_reshape():
        assert out.format == "default"
        return out.numpy()

-    _compare_nchw_nhwc(data, func2)
+    _compare_nchw_nhwc(data, func2, is_symbolic)


-def test_flatten():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_flatten(is_symbolic):
    def func(x):
        return F.flatten(x).numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_broadcast():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_broadcast(is_symbolic):
    # maintain NHWC format
    def func(x):
        out = F.broadcast_to(x, (4, 3, 2, 3))
@@ -86,7 +92,7 @@ def test_broadcast():
        return out.numpy()

    data = np.arange(0, 24).reshape((4, 3, 2, 1))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)

    # not maintain NHWC format
    def func2(x):
@@ -94,30 +100,32 @@ def test_broadcast():
        assert out.format == "default"
        return out.numpy()

-    _compare_nchw_nhwc(data, func2)
+    _compare_nchw_nhwc(data, func2, is_symbolic)


 @pytest.mark.skip("repeat cannot maintain format yet")
-def test_repeat():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_repeat(is_symbolic):
    def func(x):
        rst = F.repeat(x, 3, axis=1)
        assert rst.format == x.format
        return rst.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_getshape():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_getshape(is_symbolic):
    def func(x):
        return x.shape

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


 @pytest.mark.skip("symbolic shape is not supported yet")
-def test_get_symbolic_shape():
+def test_get_symbolic_shape(is_symbolic):
    from megengine.core._trace_option import set_symbolic_shape

    origin_opt = set_symbolic_shape(True)
@@ -126,77 +134,84 @@ def test_get_symbolic_shape():
        return x.shape.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)
    set_symbolic_shape(origin_opt)


-def test_getvalue():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_getvalue(is_symbolic):
    def func(x):
        return x.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_get_set_subtensor():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_get_set_subtensor(is_symbolic):
    def get_subtensor(x):
        return x[:, :1, :2, :3].numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, get_subtensor)
+    _compare_nchw_nhwc(data, get_subtensor, is_symbolic)

    def set_subtensor(x):
        x[:, :1, :2, :3] = 0
        return x.numpy()

-    _compare_nchw_nhwc(data, set_subtensor)
+    _compare_nchw_nhwc(data, set_subtensor, is_symbolic)


-def test_get_set_advanced_indexing():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_get_set_advanced_indexing(is_symbolic):
    def get_advanced_indexing(x):
        x = x[:, : mge.tensor(2), : mge.tensor(2), [1, 2]].numpy()
        return x

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, get_advanced_indexing)
+    _compare_nchw_nhwc(data, get_advanced_indexing, is_symbolic)

    def set_advanced_indexing(x):
        x[:, : mge.tensor(2), : mge.tensor([2]), [1,]] = 0
        return x.numpy()

-    _compare_nchw_nhwc(data, set_advanced_indexing)
+    _compare_nchw_nhwc(data, set_advanced_indexing, is_symbolic)


-def test_typecvt():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_typecvt(is_symbolic):
    def typecvt(x):
        return x.astype("float16").numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, typecvt)
+    _compare_nchw_nhwc(data, typecvt, is_symbolic)


-def test_elemwise():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_elemwise(is_symbolic):
    def elemwise(x):
        return (x * 2 + x / 2).numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, elemwise)
+    _compare_nchw_nhwc(data, elemwise, is_symbolic)


-def test_concat():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_concat(is_symbolic):
    def func(x):
        rst = F.concat([x / 2, x * 2], axis=1)
        assert rst.format == x.format
        return rst.numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


 @pytest.mark.parametrize(
    "mode", ["bilinear", "nearest"],
 )
-def test_interpolate(mode):
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_interpolate(mode, is_symbolic):
    def func(x):
        if x.format == "nhwc":
            with mge.config._override(conv_format="NHWC"):
@@ -208,10 +223,11 @@ def test_interpolate(mode):

    # NHWC interpolate only suppoted channel is 1 or 3
    data = np.arange(0, 48).reshape((1, 3, 4, 4)).astype("float32")
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_conv2d():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_conv2d(is_symbolic):
    def conv2d(x):
        if x.format == "nhwc":
            with mge.config._override(conv_format="NHWC"):
@@ -226,10 +242,11 @@ def test_conv2d():
            return F.conv2d(x, F.ones((3, 2, 1, 1)), F.ones((1, 3, 1, 1))).numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, conv2d)
+    _compare_nchw_nhwc(data, conv2d, is_symbolic)


-def test_group_conv2d():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_group_conv2d(is_symbolic):
    def conv2d(x):
        if x.format == "nhwc":
            with mge.config._override(conv_format="NHWC"):
@@ -247,10 +264,11 @@ def test_group_conv2d():
            ).numpy()

    data = np.arange(0, 48).reshape((1, 4, 3, 4))
-    _compare_nchw_nhwc(data, conv2d)
+    _compare_nchw_nhwc(data, conv2d, is_symbolic)


-def test_bn():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_bn(is_symbolic):
    def func(x):
        if x.format == "nhwc":
            with mge.config._override(bn_format="dim_111c"):
@@ -279,14 +297,15 @@ def test_bn():
            )[0].numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


 @pytest.mark.parametrize(
    "pooling",
    [F.max_pool2d, F.avg_pool2d, F.adaptive_avg_pool2d, F.adaptive_max_pool2d],
 )
-def test_pooling2d(pooling):
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_pooling2d(pooling, is_symbolic):
    def func(x):
        if x.format == "nhwc":
            with mge.config._override(conv_format="NHWC"):
@@ -297,18 +316,25 @@ def test_pooling2d(pooling):
            return pooling(x.astype("float32"), 2).numpy()

    data = np.arange(0, 24).reshape((1, 2, 3, 4))
-    _compare_nchw_nhwc(data, func)
+    _compare_nchw_nhwc(data, func, is_symbolic)


-def test_backward():
+@pytest.mark.parametrize("is_symbolic", [None])
+def test_backward(is_symbolic):
    data = np.arange(0, 24).reshape((1, 2, 3, 4))
    x = tensor(data.transpose(0, 2, 3, 1), format="nhwc")
    w = mge.tensor(np.ones((3, 1, 1, 2)), format="nhwc")
    b = mge.tensor(np.ones((1, 1, 1, 3)), format="nhwc")
    gm = GradManager().attach([w, b])
+
+    def func(x, w, b):
+        return F.conv2d(x, w, b)
+
    with gm:
        with mge.config._override(auto_format_convert=True, conv_format="NHWC"):
-            x = F.conv2d(x, w, b)
+            if is_symbolic is not None:
+                func = trace(func, symbolic=is_symbolic)
+            x = func(x, w, b)
            # TODO: fix manually convert to NHWC, usually used in detection head
            # x = x.transpose(0, 2, 3, 1).reshape(1, 18, 2)
            gm.backward(x)

--- a/imperative/src/impl/transformations/format.cpp
+++ b/imperative/src/impl/transformations/format.cpp
--- a/imperative/src/include/megbrain/imperative/transformations/format.h
+++ b/imperative/src/include/megbrain/imperative/transformations/format.h
@@ -7,7 +7,7 @@

 namespace mgb::imperative {

-class FormattedTensorValue final : public ValueImpl<FormattedTensorValue> {
+class FormattedTensorValue final : public ObjectValue<FormattedTensorValue> {
 private:
    ValueRef m_value;
    Format m_format;
@@ -26,10 +26,6 @@ public:

    const Format& format() const { return m_format; }

-    TypedValueRef<FormattedTensorValue> as(const Format::Type& target) const;
-    TypedValueRef<FormattedTensorValue> to(
-            const Format::Type& target, const std::string& scope = "") const;
-
    void clear() override {
        m_value = {};
        m_format = {};
@@ -40,23 +36,18 @@ public:
    void on_unwatch() override { m_value.unwatch(); }
 };

-/**
- * \brief simulates scalar because megbrain graph system don't support scalar
- *
- * Assume that we has 'a = ScalarValue(b)', thus 'a.shape == []', 'b.shape == [1]'.
- * This transformation simulates scalars with a flag. If a value is ScalarValue, it is
- * scalar, vice versa. So there is not scalar down this layer.
- */
 class FormatTransformation final : public Transformation {
 private:
-    bool m_auto_convert = false;
+    // enable auto_convert by default to be easier to use.
+    bool m_auto_convert = true;
+    ObjectType<FormattedTensorValue> m_value_type{"FormattedTensorValue"};

 public:
-    std::vector<ValueRef> apply_transformation(
+    ValueRefList apply_transformation(
            const Operator& op, Span<ValueRef> inputs) override;

    ValueRef unwrap(ValueRef value) override {
-        mgb_assert(!value.is<FormattedTensorValue>());
+        mgb_assert(!value.is(m_value_type));
        return value;
    }

@@ -65,6 +56,22 @@ public:
    }
    void set_auto_convert(bool enabled) { m_auto_convert = enabled; }
    bool get_auto_convert() const { return m_auto_convert; }
+
+    const Type<FormattedTensorValue>& value_type() const { return m_value_type; }
+
+    inline ValueRef unwrap_input(const ValueRef& input) const;
+    inline ValueRefList unwrap_inputs(const Span<ValueRef>& inputs) const;
+    inline ValueRef wrap_output(
+            const ValueRef& output, Format::Type type = Format::Type::DEFAULT) const;
+    inline ValueRefList wrap_outputs(
+            const ValueRefList& outputs,
+            Format::Type type = Format::Type::DEFAULT) const;
+
+    TypedValueRef<FormattedTensorValue> as(
+            const FormattedTensorValue&, const Format::Type& target) const;
+    TypedValueRef<FormattedTensorValue> to(
+            const FormattedTensorValue&, const Format::Type& target,
+            const std::string& scope = "") const;
 };

 }  // namespace mgb::imperative
--- a/imperative/src/include/megbrain/imperative/value.h
+++ b/imperative/src/include/megbrain/imperative/value.h
@@ -67,6 +67,7 @@ template <typename T>
 class Type : public IType {
 protected:
    Type(std::string name) : IType(std::move(name)) {}
+    Type(IType&& type) : IType(std::move(type)) {}
    // TODO: each type owns an allocator

 public:
@@ -104,6 +105,7 @@ template <typename T>
 class ObjectType : public Type<T> {
 public:
    ObjectType(std::string name) : Type<T>(name) {}
+    ObjectType(IType&& type) : Type<T>(std::move(type)) {}
 };

 /**