提交 50f73877 编写于 作者: M Megvii Engine Team

feat(opr): let Copy and Identity support empty IO

GitOrigin-RevId: 4e49d8eae8ba1055c3b639d02b3e0949f82fd235
上级 74cbc10d
......@@ -711,6 +711,35 @@ def test_copy_d2d(is_varnode):
copy_test("gpu0:0", "gpu0:1", network=network)
@pytest.mark.require_ngpu(2)
@pytest.mark.parametrize(
"shape, device_src, device_dst",
[
((0,), "cpu0", "cpu0"),
((10, 0), "cpu0", "cpu1"),
((2, 0, 3), "cpu0", "gpu0"),
((1, 0, 1, 0), "gpu0", "cpu0"),
((2, 3, 4, 5, 0), "gpu0", "gpu1"),
],
)
@pytest.mark.parametrize("is_symbolic", [None, True, False])
def test_copy_empty(shape, device_src, device_dst, is_symbolic):
inp = tensor(np.random.randn(*shape).astype("float32"), device=device_src)
def func(inp):
return F.copy(inp, device_dst)
if is_symbolic is not None:
func = trace(symbolic=is_symbolic)(func)
for _ in range(3):
out = func(inp)
assert out.numpy().shape == shape
assert out.device == device_dst
if is_symbolic is None:
break
@pytest.mark.parametrize(
"shape, repeats, axis",
[
......
......@@ -170,6 +170,7 @@ void OutputCallback::init_output_static_infer_desc() {}
cg::OperatorNodeBase::NodeProp* OutputCallback::do_make_node_prop() const {
NodeProp* prop = Super::do_make_node_prop();
prop->add_flag(NodeProp::Flag::NO_AUTOMATIC_DUP);
prop->add_flag(NodeProp::Flag::CROSS_COMP_NODE_MEMORY);
SmallVector<NodeProp::DepType> dep_types(input().size(),
NodeProp::DepType::DEV_COMP_ORDER);
using IT = cg::static_infer::InferType;
......
......@@ -708,7 +708,7 @@ Copy::Copy(VarNode *inp, const OperatorNodeConfig &config):
Super{inp->owner_graph(), config, "copy", {inp}}
{
add_input({inp});
add_output(None);
add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
}
SymbolVar Copy::make(SymbolVar inp, const OperatorNodeConfig &config) {
......@@ -767,6 +767,8 @@ Copy::NodeProp* Copy::do_make_node_prop() const {
using F = NodeProp::Flag;
rst->add_flag(F::CROSS_COMP_NODE_MEMORY);
rst->add_flag(F::NO_AUTOMATIC_DUP);
rst->add_dep_type_existing_var(input(0),
NodeProp::DepType::VALUE_ALLOW_EMPTY);
return rst;
}
......
......@@ -423,7 +423,7 @@ Identity::Identity(VarNode* input, const OperatorNodeConfig &config):
Super(input->owner_graph(), config, "identity", {input})
{
add_input({input});
add_output(None);
add_output(None)->add_flag(VarNode::Flag::ALLOW_EMPTY_SHAPE);
set_ignore_side_effect();
}
......@@ -437,6 +437,13 @@ SymbolVar Identity::make(
return input.insert_single_output_opr<Identity>(input.node(), config);
}
Identity::NodeProp* Identity::do_make_node_prop() const {
auto ret = Super::do_make_node_prop();
ret->add_dep_type_existing_var(input(0),
NodeProp::DepType::VALUE_ALLOW_EMPTY);
return ret;
}
#if MGB_ENABLE_GRAD
MGB_IMPL_OPR_GRAD(Identity) {
return out_grad.at(0);
......
......@@ -277,6 +277,7 @@ MGB_DEFINE_OPR_CLASS(MarkNoBroadcastElemwise, intl::ForwardInputToOutput) // {
* its gradient can be correctly computed.
*/
MGB_DEFINE_OPR_CLASS(Identity, intl::ForwardInputToOutput) // {
NodeProp* do_make_node_prop() const override;
public:
using Param = megdnn::param::Empty;
Identity(VarNode* input, const OperatorNodeConfig &config);
......
......@@ -406,6 +406,20 @@ TEST(TestOprIO, D2DNonContig) {
MGB_ASSERT_TENSOR_EQ(host_y, except_y);
}
TEST(TestOprIO, D2DCopyEmpty) {
auto cns = load_multiple_xpus(2);
HostTensorGenerator<> gen;
auto host_x = gen({2,0,3,0,4}, cns[0]);
auto graph = ComputingGraph::make();
auto x = opr::Host2DeviceCopy::make(*graph, host_x).rename("x"),
y = (opr::Copy::make(x, {cns[1]})).rename("y");
HostTensorND host_y;
auto func = graph->compile({make_callback_copy(y, host_y)});
func->execute();
ASSERT_TRUE(host_y.layout().is_empty());
ASSERT_EQ(host_y.layout(), host_x->layout());
}
TEST(TestOprIO, MultipleDeviceTensorHolder) {
auto cns = load_multiple_xpus(2);
HostTensorGenerator<> gen0;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册