feat(opr): let Copy and Identity support empty IO

GitOrigin-RevId: 4e49d8eae8ba1055c3b639d02b3e0949f82fd235

feat(opr): let Copy and Identity support empty IO
GitOrigin-RevId: 4e49d8eae8ba1055c3b639d02b3e0949f82fd235
50f73877 · Megvii Engine Team · 74cbc10d · 50f73877 · 50f73877 · 50f73877
6 changed file
--- a/imperative/python/test/unit/functional/test_tensor.py
+++ b/imperative/python/test/unit/functional/test_tensor.py
@@ -711,6 +711,35 @@ def test_copy_d2d(is_varnode):
    copy_test("gpu0:0", "gpu0:1", network=network)


+@pytest.mark.require_ngpu(2)
+@pytest.mark.parametrize(
+    "shape, device_src, device_dst",
+    [
+        ((0,), "cpu0", "cpu0"),
+        ((10, 0), "cpu0", "cpu1"),
+        ((2, 0, 3), "cpu0", "gpu0"),
+        ((1, 0, 1, 0), "gpu0", "cpu0"),
+        ((2, 3, 4, 5, 0), "gpu0", "gpu1"),
+    ],
+)
+@pytest.mark.parametrize("is_symbolic", [None, True, False])
+def test_copy_empty(shape, device_src, device_dst, is_symbolic):
+    inp = tensor(np.random.randn(*shape).astype("float32"), device=device_src)
+
+    def func(inp):
+        return F.copy(inp, device_dst)
+
+    if is_symbolic is not None:
+        func = trace(symbolic=is_symbolic)(func)
+
+    for _ in range(3):
+        out = func(inp)
+        assert out.numpy().shape == shape
+        assert out.device == device_dst
+        if is_symbolic is None:
+            break
+
+
 @pytest.mark.parametrize(
    "shape, repeats, axis",
    [

--- a/imperative/src/impl/opr_utility.cpp
+++ b/imperative/src/impl/opr_utility.cpp
@@ -170,6 +170,7 @@ void OutputCallback::init_output_static_infer_desc() {}
 cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const {
    NodeProp* prop = Super::do_make_node_prop();
    prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP);
+    prop->add_flag(NodeProp::Flag::CROSS_COMP_NODE_MEMORY);
    SmallVector<NodeProp::DepType> dep_types(input().size(),
                                             NodeProp::DepType::DEV_COMP_ORDER);
    using IT = cg::static_infer::InferType;

--- a/src/opr/impl/io.cpp
+++ b/src/opr/impl/io.cpp
@@ -708,7 +708,7 @@ Copy::Copy(VarNode *inp, const OperatorNodeConfig &config):
    Super{inp->owner_graph(), config, "copy", {inp}}
 {
    add_input({inp});
-    add_output(None);
+    add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
 }

 SymbolVar Copy::make(SymbolVar inp, const OperatorNodeConfig &config) {
@@ -767,6 +767,8 @@ Copy::NodeProp* Copy::do_make_node_prop() const {
    using F = NodeProp::Flag;
    rst->add_flag(F::CROSS_COMP_NODE_MEMORY);
    rst->add_flag(F::NO_AUTOMATIC_DUP);
+    rst->add_dep_type_existing_var(input(0),
+                                   NodeProp::DepType::VALUE_ALLOW_EMPTY);
    return rst;
 }


--- a/src/opr/impl/utility.cpp
+++ b/src/opr/impl/utility.cpp
@@ -423,7 +423,7 @@ Identity::Identity(VarNode* input, const OperatorNodeConfig &config):
    Super(input->owner_graph(), config, "identity", {input})
 {
    add_input({input});
-    add_output(None);
+    add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
    set_ignore_side_effect();
 }

@@ -437,6 +437,13 @@ SymbolVar Identity::make(
    return input.insert_single_output_opr<Identity>(input.node(), config);
 }

+Identity::NodeProp* Identity::do_make_node_prop() const {
+    auto ret = Super::do_make_node_prop();
+    ret->add_dep_type_existing_var(input(0),
+                                   NodeProp::DepType::VALUE_ALLOW_EMPTY);
+    return ret;
+}
+
 #if MGB_ENABLE_GRAD
 MGB_IMPL_OPR_GRAD(Identity) {
    return out_grad.at(0);

--- a/src/opr/include/megbrain/opr/utility.h
+++ b/src/opr/include/megbrain/opr/utility.h
@@ -277,6 +277,7 @@ MGB_DEFINE_OPR_CLASS(MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // {
 * its gradient can be correctly computed.
 */
 MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // {
+        NodeProp* do_make_node_prop() const override;
    public:
        using Param = megdnn::param::Empty;
        Identity(VarNode* input, const OperatorNodeConfig &config);

--- a/src/opr/test/io.cpp
+++ b/src/opr/test/io.cpp
@@ -406,6 +406,20 @@ TEST(TestOprIO, D2DNonContig) {
    MGB_ASSERT_TENSOR_EQ(host_y, except_y);
 }

+TEST(TestOprIO, D2DCopyEmpty) {
+    auto cns = load_multiple_xpus(2);
+    HostTensorGenerator<> gen;
+    auto host_x = gen({2,0,3,0,4}, cns[0]);
+    auto graph = ComputingGraph::make();
+    auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
+         y = (opr::Copy::make(x, {cns[1]})).rename("y");
+    HostTensorND host_y;
+    auto func = graph->compile({make_callback_copy(y, host_y)});
+    func->execute();
+    ASSERT_TRUE(host_y.layout().is_empty());
+    ASSERT_EQ(host_y.layout(), host_x->layout());
+}
+
 TEST(TestOprIO, MultipleDeviceTensorHolder) {
    auto cns = load_multiple_xpus(2);
    HostTensorGenerator<> gen0;