diff --git a/imperative/python/test/unit/functional/test_tensor.py b/imperative/python/test/unit/functional/test_tensor.py index e6f56819d82c30ff9b9016ce73db09e3b4274ec7..e3423a492660dcaf320fcac46ff2b1c1042dcd8d 100644 --- a/imperative/python/test/unit/functional/test_tensor.py +++ b/imperative/python/test/unit/functional/test_tensor.py @@ -711,6 +711,35 @@ def test_copy_d2d(is_varnode): copy_test("gpu0:0", "gpu0:1", network=network) +@pytest.mark.require_ngpu(2) +@pytest.mark.parametrize( + "shape, device_src, device_dst", + [ + ((0,), "cpu0", "cpu0"), + ((10, 0), "cpu0", "cpu1"), + ((2, 0, 3), "cpu0", "gpu0"), + ((1, 0, 1, 0), "gpu0", "cpu0"), + ((2, 3, 4, 5, 0), "gpu0", "gpu1"), + ], +) +@pytest.mark.parametrize("is_symbolic", [None, True, False]) +def test_copy_empty(shape, device_src, device_dst, is_symbolic): + inp = tensor(np.random.randn(*shape).astype("float32"), device=device_src) + + def func(inp): + return F.copy(inp, device_dst) + + if is_symbolic is not None: + func = trace(symbolic=is_symbolic)(func) + + for _ in range(3): + out = func(inp) + assert out.numpy().shape == shape + assert out.device == device_dst + if is_symbolic is None: + break + + @pytest.mark.parametrize( "shape, repeats, axis", [ diff --git a/imperative/src/impl/opr_utility.cpp b/imperative/src/impl/opr_utility.cpp index 7238f58c4929e280a23fc7e71c1644728fa48a8f..2cbd1da0563a3b45473d550d28703cc05c4b651b 100644 --- a/imperative/src/impl/opr_utility.cpp +++ b/imperative/src/impl/opr_utility.cpp @@ -170,6 +170,7 @@ void OutputCallback::init_output_static_infer_desc() {} cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const { NodeProp* prop = Super::do_make_node_prop(); prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP); + prop->add_flag(NodeProp::Flag::CROSS_COMP_NODE_MEMORY); SmallVector dep_types(input().size(), NodeProp::DepType::DEV_COMP_ORDER); using IT = cg::static_infer::InferType; diff --git a/src/opr/impl/io.cpp b/src/opr/impl/io.cpp index 4a691213d924c11b2f039040be8d609c491919f7..2a6c59f1b9f55c6c9b641988ef34b05ecfd169db 100644 --- a/src/opr/impl/io.cpp +++ b/src/opr/impl/io.cpp @@ -708,7 +708,7 @@ Copy::Copy(VarNode *inp, const OperatorNodeConfig &config): Super{inp->owner_graph(), config, "copy", {inp}} { add_input({inp}); - add_output(None); + add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); } SymbolVar Copy::make(SymbolVar inp, const OperatorNodeConfig &config) { @@ -767,6 +767,8 @@ Copy::NodeProp* Copy::do_make_node_prop() const { using F = NodeProp::Flag; rst->add_flag(F::CROSS_COMP_NODE_MEMORY); rst->add_flag(F::NO_AUTOMATIC_DUP); + rst->add_dep_type_existing_var(input(0), + NodeProp::DepType::VALUE_ALLOW_EMPTY); return rst; } diff --git a/src/opr/impl/utility.cpp b/src/opr/impl/utility.cpp index 2aa82fd2bfbf77474b4dcf9df3212be4f75a2f4e..b2413486f7b38d6191e06a1534ce970542a8c701 100644 --- a/src/opr/impl/utility.cpp +++ b/src/opr/impl/utility.cpp @@ -423,7 +423,7 @@ Identity::Identity(VarNode* input, const OperatorNodeConfig &config): Super(input->owner_graph(), config, "identity", {input}) { add_input({input}); - add_output(None); + add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE); set_ignore_side_effect(); } @@ -437,6 +437,13 @@ SymbolVar Identity::make( return input.insert_single_output_opr(input.node(), config); } +Identity::NodeProp* Identity::do_make_node_prop() const { + auto ret = Super::do_make_node_prop(); + ret->add_dep_type_existing_var(input(0), + NodeProp::DepType::VALUE_ALLOW_EMPTY); + return ret; +} + #if MGB_ENABLE_GRAD MGB_IMPL_OPR_GRAD(Identity) { return out_grad.at(0); diff --git a/src/opr/include/megbrain/opr/utility.h b/src/opr/include/megbrain/opr/utility.h index 8f3cf6b80d2209b5d9adbdf2b413b20b11896521..7112e4f3e056c2e08dcb8e774272054a4e2a30e6 100644 --- a/src/opr/include/megbrain/opr/utility.h +++ b/src/opr/include/megbrain/opr/utility.h @@ -277,6 +277,7 @@ MGB_DEFINE_OPR_CLASS(MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // { * its gradient can be correctly computed. */ MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // { + NodeProp* do_make_node_prop() const override; public: using Param = megdnn::param::Empty; Identity(VarNode* input, const OperatorNodeConfig &config); diff --git a/src/opr/test/io.cpp b/src/opr/test/io.cpp index 61782789805c29ae8e0359e38d7e0516c8860719..d85439c7a2ac2cb5dea14a720521dd6974f51c4d 100644 --- a/src/opr/test/io.cpp +++ b/src/opr/test/io.cpp @@ -406,6 +406,20 @@ TEST(TestOprIO, D2DNonContig) { MGB_ASSERT_TENSOR_EQ(host_y, except_y); } +TEST(TestOprIO, D2DCopyEmpty) { + auto cns = load_multiple_xpus(2); + HostTensorGenerator<> gen; + auto host_x = gen({2,0,3,0,4}, cns[0]); + auto graph = ComputingGraph::make(); + auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"), + y = (opr::Copy::make(x, {cns[1]})).rename("y"); + HostTensorND host_y; + auto func = graph->compile({make_callback_copy(y, host_y)}); + func->execute(); + ASSERT_TRUE(host_y.layout().is_empty()); + ASSERT_EQ(host_y.layout(), host_x->layout()); +} + TEST(TestOprIO, MultipleDeviceTensorHolder) { auto cns = load_multiple_xpus(2); HostTensorGenerator<> gen0;