From d19fc2c1da920a1a9cf840e28023ff0a21dc60d3 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Mon, 6 Jun 2022 20:44:35 +0800 Subject: [PATCH] fix(imperative): add alloc TensorPtr in imperative GitOrigin-RevId: 1b438fc436cc752f5437b2bd047006f0e0b1b2e5 --- imperative/src/impl/dnn_op_helper.h | 14 +++-- imperative/src/impl/ops/adaptive_pooling.cpp | 13 ++-- imperative/src/impl/ops/batch_norm.cpp | 60 ++++++++----------- imperative/src/impl/ops/cond_take.cpp | 5 +- imperative/src/impl/ops/convolution.cpp | 58 +++++++----------- imperative/src/impl/ops/elemwise.cpp | 13 ++-- imperative/src/impl/ops/indexing.cpp | 14 ++--- imperative/src/impl/ops/lamb.cpp | 24 ++++---- imperative/src/impl/ops/layer_norm.cpp | 29 ++++----- imperative/src/impl/ops/matmul.cpp | 60 ++++++++----------- imperative/src/impl/ops/misc.cpp | 5 +- imperative/src/impl/ops/pooling.cpp | 13 ++-- imperative/src/impl/ops/reduce.cpp | 41 ++++++------- imperative/src/impl/ops/tensor_manip.cpp | 5 +- imperative/src/impl/ops/vision.cpp | 4 +- imperative/src/impl/physical_tensor.cpp | 6 +- .../megbrain/imperative/physical_tensor.h | 19 ++++++ src/opr/impl/basic_arith.cpp | 4 +- src/opr/include/megbrain/opr/basic_arith.h | 2 +- 19 files changed, 177 insertions(+), 212 deletions(-) diff --git a/imperative/src/impl/dnn_op_helper.h b/imperative/src/impl/dnn_op_helper.h index bd2e0c4b4..029207a80 100644 --- a/imperative/src/impl/dnn_op_helper.h +++ b/imperative/src/impl/dnn_op_helper.h @@ -27,10 +27,16 @@ struct DnnOprCaller { return mgb::opr::intl::create_megdnn_opr(cn); } - megdnn::Workspace create_workspace(TensorLayout layout) { - dev_tensor = Tensor::make(layout, cn)->dev_tensor(); - workspace = - megdnn::Workspace(dev_tensor.raw_ptr(), dev_tensor.storage().size()); + Workspace create_workspace(size_t sz) { + if (workspace.raw_ptr) { + mgb_throw(MegBrainError, "workspace should not be applicated many times"); + } + if (sz) { + TensorLayout layout({sz}, dtype::Byte()); + dev_tensor = Tensor::make(layout, cn)->dev_tensor(); + workspace = megdnn::Workspace( + dev_tensor.raw_ptr(), dev_tensor.storage().size()); + } return workspace; } diff --git a/imperative/src/impl/ops/adaptive_pooling.cpp b/imperative/src/impl/ops/adaptive_pooling.cpp index 5701b9c55..183f1d68c 100644 --- a/imperative/src/impl/ops/adaptive_pooling.cpp +++ b/imperative/src/impl/ops/adaptive_pooling.cpp @@ -135,21 +135,16 @@ SmallVector apply_on_physical_tensor( param.window_w = IW - (OW - 1) * param.stride_w; TensorND src = inputs[0]->dnn_tensor(); - DeviceTensorND dst = - BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); + auto dst = Tensor::make(dst_layout, cn); size_t sz = setup_algo( {src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, ::megdnn::param::ExecutionPolicy{}, false); - megdnn::Workspace dnn_wk; - if (sz) { - TensorLayout w_layout({sz}, dtype::Byte()); - dnn_wk = dnn_opr.create_workspace(w_layout); - } - dnn_opr.op->exec(src, dst.as_megdnn(), dnn_wk); + auto dnn_wk = dnn_opr.create_workspace(sz); + dnn_opr.op->exec(src, dst->dnn_tensor(), dnn_wk); - return {Tensor::make(dst)}; + return {dst}; } OP_TRAIT_REG(AdaptivePooling, AdaptivePooling) diff --git a/imperative/src/impl/ops/batch_norm.cpp b/imperative/src/impl/ops/batch_norm.cpp index b67e81d7b..bc4e798cc 100644 --- a/imperative/src/impl/ops/batch_norm.cpp +++ b/imperative/src/impl/ops/batch_norm.cpp @@ -160,10 +160,8 @@ SmallVector apply_on_physical_tensor( bool empty_input = src_layout.is_empty(); size_t nr_inp = inputs.size(); - DeviceTensorND reserve; size_t sz = 0, rsz = 0; - TensorLayout w_layout({sz}, dtype::Byte()); TensorLayout r_layout({rsz}, dtype::Byte()); if (!empty_input) { @@ -172,79 +170,71 @@ SmallVector apply_on_physical_tensor( src_layout, src_layout, src_layout); rsz = dnn_opr.op->get_reserve_in_bytes(src_layout); - w_layout = TensorLayout({sz}, dtype::Byte()); r_layout = TensorLayout({rsz}, dtype::Byte()); } - auto dnn_wk = dnn_opr.create_workspace(w_layout); - reserve = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, r_layout); + auto dnn_wk = dnn_opr.create_workspace(sz); + auto reserve = Tensor::make(r_layout, comp_node); // alloc memory - DeviceTensorND y = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, src_layout); + auto y = Tensor::make(src_layout, comp_node); - DeviceTensorND save_mean = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout); - DeviceTensorND save_variance = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout); + auto save_mean = Tensor::make(scale_layout, comp_node); + + auto save_variance = Tensor::make(scale_layout, comp_node); if (op_def.fwd_mode == ::megdnn::param::BN::FwdMode::INFERENCE) { if (!empty_input) dnn_opr.op->exec( inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], - inp_tensornds[3], inp_tensornds[4], save_mean.as_megdnn(), - save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), + inp_tensornds[3], inp_tensornds[4], save_mean->dnn_tensor(), + save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(), dnn_wk); - return {inputs[3], inputs[4], Tensor::make(reserve), Tensor::make(y)}; + return {inputs[3], inputs[4], reserve, y}; } else { - DeviceTensorND mean, variance; if (nr_inp == 5) { - mean = BlobManager::inst()->alloc_workspace_with_defrag( - comp_node, scale_layout); - variance = BlobManager::inst()->alloc_workspace_with_defrag( - comp_node, scale_layout); + auto mean = Tensor::make(scale_layout, comp_node); + + auto variance = Tensor::make(scale_layout, comp_node); megdnn::RefPtr src_ptr1( inp_tensornds[3].get_ref_ptr().get_ptr(), inputs[3]->offset()); megdnn::RefPtr dst_ptr1( - mean.storage().get_ref_ptr(), mean.storage().offset(), false); + mean->dev_tensor().storage().get_ref_ptr(), + mean->dev_tensor().storage().offset(), false); comp_node.peer_copy_to_ref( comp_node, dst_ptr1, src_ptr1, scale_layout.span().high_byte); megdnn::RefPtr src_ptr2( inp_tensornds[4].get_ref_ptr().get_ptr(), inputs[4]->offset()); megdnn::RefPtr dst_ptr2( - variance.storage().get_ref_ptr(), variance.storage().offset(), - false); + variance->dev_tensor().storage().get_ref_ptr(), + variance->dev_tensor().storage().offset(), false); comp_node.peer_copy_to_ref( comp_node, dst_ptr2, src_ptr2, scale_layout.span().high_byte); if (!empty_input) dnn_opr.op->exec( inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], - mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), - save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), - dnn_wk); + mean->dnn_tensor(), variance->dnn_tensor(), + save_mean->dnn_tensor(), save_variance->dnn_tensor(), + reserve->dnn_tensor(), y->dnn_tensor(), dnn_wk); - return {Tensor::make(mean), Tensor::make(variance), - Tensor::make(save_mean), Tensor::make(save_variance), - Tensor::make(reserve), Tensor::make(y)}; + return {mean, variance, save_mean, save_variance, reserve, y}; } TensorLayout m_layout({0}, scale_layout.dtype); - mean = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout); - variance = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout); + auto mean = Tensor::make(m_layout, comp_node); + auto variance = Tensor::make(m_layout, comp_node); if (!empty_input) { dnn_opr.op->exec( inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], - mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), - save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), + mean->dnn_tensor(), variance->dnn_tensor(), save_mean->dnn_tensor(), + save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(), dnn_wk); } - return {Tensor::make(save_mean), Tensor::make(save_variance), - Tensor::make(reserve), Tensor::make(y)}; + return {save_mean, save_variance, reserve, y}; } } diff --git a/imperative/src/impl/ops/cond_take.cpp b/imperative/src/impl/ops/cond_take.cpp index 3495f0cf6..28b44905a 100644 --- a/imperative/src/impl/ops/cond_take.cpp +++ b/imperative/src/impl/ops/cond_take.cpp @@ -44,10 +44,9 @@ SmallVector apply_on_physical_tensor( DnnOprCaller dnn_op(inp->comp_node()); dnn_op.op->param().val = 1; - TensorLayout m_layout( - {dnn_op.op->get_workspace_in_bytes(inp->layout())}, dtype::Byte()); + size_t sz = dnn_op.op->get_workspace_in_bytes(inp->layout()); - auto dnn_workspace = dnn_op.create_workspace(m_layout); + auto dnn_workspace = dnn_op.create_workspace(sz); dnn_op.op->exec( inp->dev_tensor().as_megdnn(), msk->dev_tensor().as_megdnn(), diff --git a/imperative/src/impl/ops/convolution.cpp b/imperative/src/impl/ops/convolution.cpp index b079e182c..b7bd452f5 100644 --- a/imperative/src/impl/ops/convolution.cpp +++ b/imperative/src/impl/ops/convolution.cpp @@ -165,11 +165,10 @@ SmallVector apply_on_physical_tensor( TensorLayout empty_shp({0}, inputs[0]->dtype()); empty_shp.ndim = 0; - DeviceTensorND empty_bias = - BlobManager::inst()->alloc_workspace_with_defrag(cn, empty_shp); + auto empty_bias = Tensor::make(empty_shp, cn); - inp_tensornds[2] = empty_bias.as_megdnn(); - inp_tensornds[3] = empty_bias.as_megdnn(); + inp_tensornds[2] = empty_bias->dnn_tensor(); + inp_tensornds[3] = empty_bias->dnn_tensor(); size_t sz = setup_algo( {inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]}, @@ -177,17 +176,15 @@ SmallVector apply_on_physical_tensor( &inp_tensornds); // alloc memory - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); + auto out = Tensor::make(out_layout, cn); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); + auto dnn_wk = dnn_opr.create_workspace(sz); // exeucte dnn_opr.op->exec( - inp_tensornds[0], inp_tensornds[1], empty_bias.as_megdnn(), - empty_bias.as_megdnn(), out.as_megdnn(), nullptr, dnn_wk); - return {Tensor::make(out)}; + inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[3], + out->dnn_tensor(), nullptr, dnn_wk); + return {out}; } OP_TRAIT_REG(Convolution, Convolution, opr::Convolution) @@ -368,6 +365,8 @@ SmallVector apply_on_physical_tensor( def, inputs[1]->layout().ndim, inputs[0]->layout(), inputs[1]->layout(), cn); + auto out = Tensor::make(out_layout, cn); + using TensorND = megdnn::TensorND; SmallVector inp_tensornds(inputs.size()); TensorLayoutArray inp_shapes(inputs.size()), oup_shapes(output_descs.size()); @@ -383,16 +382,11 @@ SmallVector apply_on_physical_tensor( {inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false, false, cn, convbwd.policy(), false, &inp_tensornds); - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); - - auto wk = Blob::make(cn, sz); - auto ptr = wk->storage().get(); - megdnn::Workspace dnn_wk(ptr, sz); + auto dnn_wk = dnn_opr.create_workspace(sz); // exeucte - dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); - return {Tensor::make(out)}; + dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); + return {out}; } OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData) @@ -549,18 +543,13 @@ SmallVector apply_on_physical_tensor( false, cn, conv.policy(), false, &inp_tensornds); // alloc memory - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); + auto out = Tensor::make(out_layout, cn); - megdnn::Workspace dnn_wk; - if (sz != 0) { - TensorLayout w_layout({sz}, dtype::Byte()); - dnn_wk = dnn_opr.create_workspace(w_layout); - } + auto dnn_wk = dnn_opr.create_workspace(sz); // exeucte - dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); - return {Tensor::make(out)}; + dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); + return {out}; } OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D) @@ -615,8 +604,7 @@ SmallVector apply_on_physical_tensor( megdnn::Convolution3DBackwardData::deduce_layout_impl( wlayout, dlayout, op_def.param(), oup_layout); } - DeviceTensorND oup = - BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); + auto oup = Tensor::make(oup_layout, cn); SmallVector inp_tensornds(inputs.size()); inp_tensornds[0] = inputs[0]->dnn_tensor(); @@ -624,14 +612,10 @@ SmallVector apply_on_physical_tensor( size_t wk_size = setup_algo( {wlayout, dlayout, oup_layout}, dnn_opr.get(), 0, false, false, cn, op_def.policy(), false, &inp_tensornds); - megdnn::Workspace dnn_wk; - if (wk_size != 0) { - TensorLayout w_layout({wk_size}, dtype::Byte()); - dnn_wk = caller.create_workspace(w_layout); - } + auto dnn_wk = caller.create_workspace(wk_size); - dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup.as_megdnn(), dnn_wk); - return {Tensor::make(oup)}; + dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup->dnn_tensor(), dnn_wk); + return {oup}; } auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { diff --git a/imperative/src/impl/ops/elemwise.cpp b/imperative/src/impl/ops/elemwise.cpp index a62d962d1..2394a4c64 100644 --- a/imperative/src/impl/ops/elemwise.cpp +++ b/imperative/src/impl/ops/elemwise.cpp @@ -121,10 +121,10 @@ SmallVector apply_on_physical_tensor( megdnn::Elemwise::deduce_shape(inp_shapes, layout); layout.init_contiguous_stride(); - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); + auto out = Tensor::make(layout, comp_node); + if (is_empty) { - return {Tensor::make(out)}; + return {out}; } DnnOprCaller dnn_opr(comp_node); @@ -133,12 +133,13 @@ SmallVector apply_on_physical_tensor( dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 || (inp_tensornds.size() && inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { - opr::Elemwise::perform_dnn(comp_node, out, inp_tensornds, dnn_opr.op); + opr::Elemwise::perform_dnn( + comp_node, out->dnn_tensor(), inp_tensornds, dnn_opr.op); } else { - dnn_opr.op->exec(inp_tensornds, out.as_megdnn()); + dnn_opr.op->exec(inp_tensornds, out->dnn_tensor()); } - return {Tensor::make(out)}; + return {out}; } MGB_DEFINE_OPR_CLASS( diff --git a/imperative/src/impl/ops/indexing.cpp b/imperative/src/impl/ops/indexing.cpp index 63ecd1ce0..f31d71dd1 100644 --- a/imperative/src/impl/ops/indexing.cpp +++ b/imperative/src/impl/ops/indexing.cpp @@ -85,10 +85,9 @@ SmallVector apply_on_physical_tensor( TensorPtr out = Tensor::make(tlayout, inp->comp_node()); megdnn::TensorND in = inp->dnn_tensor(); megdnn::TensorND ind = index->dnn_tensor(); - TensorLayout m_layout( - {dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)}, - dtype::Byte()); - auto dnn_workspace = dnn_op.create_workspace(m_layout); + size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout); + + auto dnn_workspace = dnn_op.create_workspace(sz); dnn_op.op->exec(in, ind, out->dnn_tensor(), dnn_workspace); return {out}; } @@ -152,10 +151,9 @@ SmallVector apply_on_physical_tensor( megdnn::TensorND in = inp->dnn_tensor(); megdnn::TensorND ind = index->dnn_tensor(); megdnn::TensorND su = sub->dnn_tensor(); - TensorLayout m_layout( - {dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)}, - dtype::Byte()); - auto dnn_workspace = dnn_op.create_workspace(m_layout); + + size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout); + auto dnn_workspace = dnn_op.create_workspace(sz); dnn_op.op->exec(out->dnn_tensor(), ind, su, dnn_workspace); return {out}; } diff --git a/imperative/src/impl/ops/lamb.cpp b/imperative/src/impl/ops/lamb.cpp index 35dc22b6b..598563f17 100644 --- a/imperative/src/impl/ops/lamb.cpp +++ b/imperative/src/impl/ops/lamb.cpp @@ -45,29 +45,25 @@ SmallVector apply_on_physical_tensor( TensorLayout v_t_1_layout{v_t_1->layout()}; TensorLayout lamb_param_layout{lamb_param->layout()}; - DeviceTensorND m_t = BlobManager::inst()->alloc_workspace_with_defrag( - m_t_1->comp_node(), m_t_1_layout); + auto m_t = Tensor::make(m_t_1_layout, m_t_1->comp_node()); - DeviceTensorND v_t = BlobManager::inst()->alloc_workspace_with_defrag( - v_t_1->comp_node(), v_t_1_layout); + auto v_t = Tensor::make(v_t_1_layout, v_t_1->comp_node()); - DeviceTensorND new_param = BlobManager::inst()->alloc_workspace_with_defrag( - lamb_param->comp_node(), lamb_param_layout); + auto new_param = Tensor::make(lamb_param_layout, lamb_param->comp_node()); DnnOprCaller caller{lamb_param->comp_node()}; - TensorLayout m_layout( - {caller.op->get_workspace_in_bytes( - m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), - grad->layout(), m_t.layout(), v_t.layout(), new_param.layout())}, - dtype::Byte()); + size_t sz = caller.op->get_workspace_in_bytes( + m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), grad->layout(), + m_t->layout(), v_t->layout(), new_param->layout()); - auto dnn_workspace = caller.create_workspace(m_layout); + auto dnn_workspace = caller.create_workspace(sz); caller.op->param() = op.param(); caller.op->exec( m_t_1->dev_tensor().as_megdnn(), v_t_1->dev_tensor().as_megdnn(), lamb_param->dev_tensor().as_megdnn(), grad->dev_tensor().as_megdnn(), - m_t.as_megdnn(), v_t.as_megdnn(), new_param.as_megdnn(), dnn_workspace); - return {Tensor::make(m_t), Tensor::make(v_t), Tensor::make(new_param)}; + m_t->dnn_tensor(), v_t->dnn_tensor(), new_param->dnn_tensor(), + dnn_workspace); + return {m_t, v_t, new_param}; } OP_TRAIT_REG(LAMBUpdate, LAMBUpdate) diff --git a/imperative/src/impl/ops/layer_norm.cpp b/imperative/src/impl/ops/layer_norm.cpp index 53633c8ef..051d8f063 100644 --- a/imperative/src/impl/ops/layer_norm.cpp +++ b/imperative/src/impl/ops/layer_norm.cpp @@ -77,32 +77,25 @@ SmallVector apply_on_physical_tensor( megdnn::LayerNorm::deduce_layout_fwd_impl( inputs[0]->dnn_tensor().layout, p, oup_layout, mean_layout, rstd_layout); - DeviceTensorND out_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); - DeviceTensorND mean_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(cn, mean_layout); - DeviceTensorND rstd_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(cn, rstd_layout); - - megdnn::Workspace dnn_wk; + auto out = Tensor::make(oup_layout, cn); + + auto mean = Tensor::make(mean_layout, cn); + + auto rstd = Tensor::make(rstd_layout, cn); + auto wk_size = caller.op->get_workspace_in_bytes( inputs[0]->dnn_tensor().layout, p.affine ? inputs[1]->dnn_tensor().layout : TensorLayout(), p.affine ? inputs[2]->dnn_tensor().layout : TensorLayout(), oup_layout, mean_layout, rstd_layout); - if (wk_size != 0) { - TensorLayout w_layout({wk_size}, dtype::Byte()); - dnn_wk = caller.create_workspace(w_layout); - } + auto dnn_wk = caller.create_workspace(wk_size); - dnn_opr->exec( + caller.op->exec( inputs[0]->dnn_tensor(), p.affine ? inputs[1]->dnn_tensor() : megdnn::TensorND(), - p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), - out_devtensor.as_megdnn(), mean_devtensor.as_megdnn(), - rstd_devtensor.as_megdnn(), dnn_wk); - return {Tensor::make(out_devtensor), Tensor::make(mean_devtensor), - Tensor::make(rstd_devtensor)}; + p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), out->dnn_tensor(), + mean->dnn_tensor(), rstd->dnn_tensor(), dnn_wk); + return {out, mean, rstd}; } OP_TRAIT_REG(LayerNorm, LayerNorm) diff --git a/imperative/src/impl/ops/matmul.cpp b/imperative/src/impl/ops/matmul.cpp index 1173ac16f..3b1cdb621 100644 --- a/imperative/src/impl/ops/matmul.cpp +++ b/imperative/src/impl/ops/matmul.cpp @@ -185,12 +185,12 @@ SmallVector apply_on_physical_tensor( } if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, real_dst_layout); - if (!out.empty()) { - dev_tensor_memset(out, 0); + auto out = Tensor::make(real_dst_layout, cn); + + if (!out->empty()) { + dev_tensor_memset(out->dev_tensor(), 0); } - return {Tensor::make(out)}; + return {out}; } TensorLayout layout_a = layout1, layout_b = layout2; @@ -232,13 +232,11 @@ SmallVector apply_on_physical_tensor( size_t sz = setup_algo( {layout_a, layout_b, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, matmul.policy(), false, &inp_tensornds); - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); + auto out = Tensor::make(dst_layout, cn); + auto dnn_wk = dnn_opr.create_workspace(sz); - dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); - return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(real_dst_layout)))}; + dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); + return {out->sub(0, real_dst_layout)}; } SmallVector get_input_layout_constraint( @@ -461,12 +459,12 @@ SmallVector apply_on_physical_tensor( dst_layout.init_contiguous_stride(); if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); - if (!out.empty()) { - dev_tensor_memset(out, 0); + auto out = Tensor::make(dst_layout, cn); + + if (!out->empty()) { + dev_tensor_memset(out->dev_tensor(), 0); } - return {Tensor::make(out)}; + return {out}; } SmallVector inp_tensornds(2u); @@ -479,19 +477,17 @@ SmallVector apply_on_physical_tensor( {layout1, layout2, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, matmul.policy(), false, &inp_tensornds); - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); + auto out = Tensor::make(dst_layout, cn); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); - dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); + auto dnn_wk = dnn_opr.create_workspace(sz); + dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); shp1[shp1.ndim - 2] = dst_layout[dst_layout.ndim - 2]; shp1[shp1.ndim - 1] = dst_layout[dst_layout.ndim - 1]; if (maxdim > 3) { dst_layout = dst_layout.reshape(shp1); } - return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(dst_layout)))}; + return {out->sub(0, dst_layout)}; } SmallVector get_input_layout_constraint( @@ -540,27 +536,23 @@ SmallVector apply_on_physical_tensor( dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); - if (!out.empty()) { - dev_tensor_memset(out, 0); + auto out = Tensor::make(oup_layout, comp_node); + if (!out->empty()) { + dev_tensor_memset(out->dev_tensor(), 0); } - return {Tensor::make(out)}; + return {out}; } auto sz = dnn_opr.op->get_workspace_in_bytes( inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); - DeviceTensorND out_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); + auto out = Tensor::make(oup_layout, comp_node); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); + auto dnn_wk = dnn_opr.create_workspace(sz); - dnn_opr.op->exec( - inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk); + dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk); - return {Tensor::make(out_devtensor)}; + return {out}; } std::tuple, bool> infer_output_attrs_fallible( diff --git a/imperative/src/impl/ops/misc.cpp b/imperative/src/impl/ops/misc.cpp index 95b7ea8ad..b72f4eb2b 100644 --- a/imperative/src/impl/ops/misc.cpp +++ b/imperative/src/impl/ops/misc.cpp @@ -36,9 +36,8 @@ SmallVector apply_on_physical_tensor( megdnn::CheckNonFinite::Param param({op.scale}); dnn_opr.op->param() = param; size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout()); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); - dnn_opr.op->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk); + auto dnn_wk = dnn_opr.create_workspace(sz); + dnn_opr.op->exec(srcs, dest->dnn_tensor(), dnn_wk); return outputs; } diff --git a/imperative/src/impl/ops/pooling.cpp b/imperative/src/impl/ops/pooling.cpp index 294fccc17..0c8dc25d2 100644 --- a/imperative/src/impl/ops/pooling.cpp +++ b/imperative/src/impl/ops/pooling.cpp @@ -66,17 +66,12 @@ SmallVector apply_on_physical_tensor( {inp_tensornds[0].layout, oup_layout}, dnn_opr.get(), 0, false, false, cn, op_def.policy(), false, &inp_tensornds); - DeviceTensorND out_devtensor = - BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout); + auto out = Tensor::make(oup_layout, cn); - megdnn::Workspace dnn_wk; - if (wk_size) { - TensorLayout w_layout({wk_size}, dtype::Byte()); - dnn_wk = caller.create_workspace(w_layout); - } + auto dnn_wk = caller.create_workspace(wk_size); - dnn_opr->exec(inp_tensornds[0], out_devtensor.as_megdnn(), dnn_wk); - return {Tensor::make(out_devtensor)}; + caller.op->exec(inp_tensornds[0], out->dnn_tensor(), dnn_wk); + return {out}; } OP_TRAIT_REG(Pooling, Pooling) diff --git a/imperative/src/impl/ops/reduce.cpp b/imperative/src/impl/ops/reduce.cpp index f50bef0fd..3d4d81431 100644 --- a/imperative/src/impl/ops/reduce.cpp +++ b/imperative/src/impl/ops/reduce.cpp @@ -117,20 +117,20 @@ SmallVector apply_on_physical_tensor( layout.remove_axis_inplace(axis); layout.init_contiguous_stride(); } - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); + auto out = Tensor::make(layout, comp_node); + std::string err_msg; switch (mode) { case Reduce::Mode::SUM: - if (!out.empty()) { - dev_tensor_memset(out, 0); + if (!out->empty()) { + dev_tensor_memset(out->dev_tensor(), 0); } break; case Reduce::Mode::PRODUCT: - if (!out.empty()) { + if (!out->empty()) { DnnOprCaller fill_op(comp_node); fill_op.op->param() = 1; - fill_op.op->exec(out.as_megdnn(), {}); + fill_op.op->exec(out->dnn_tensor(), {}); } break; case Reduce::Mode::MEAN: @@ -153,34 +153,29 @@ SmallVector apply_on_physical_tensor( MegBrainError, "empty input is not allowed for reduce mode: %s", err_msg.c_str()); } - return {Tensor::make(out)}; + return {out}; } auto dnn_ten = inputs[0]->dnn_tensor(); dnn_ten.layout = src; inp_tensornds.push_back(dnn_ten); - megdnn::Workspace dnn_wk; - auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); - if (wk_size) { - TensorLayout w_layout({wk_size}, dtype::Byte()); - dnn_wk = dnn_op.create_workspace(w_layout); - } - - DeviceTensorND out = - BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout); - - dnn_op.op->exec(inp_tensornds[0], out.as_megdnn(), dnn_wk); + auto dnn_wk = dnn_op.create_workspace(wk_size); + TensorLayout ori_layout = layout; if (!keepdim && src.ndim > 1) { - auto out_layout = out.layout(); - out_layout.remove_axis_inplace(axis); - out_layout.init_contiguous_stride(); - out.resize(out_layout); + layout.remove_axis_inplace(axis); + layout.init_contiguous_stride(); } - return {Tensor::make(out)}; + auto out = Tensor::make(layout, comp_node); + auto dnn_out = out->dnn_tensor(); + dnn_out.layout = ori_layout; + + dnn_op.op->exec(inp_tensornds[0], dnn_out, dnn_wk); + + return {out}; } std::tuple, bool> infer_output_attrs_fallible( diff --git a/imperative/src/impl/ops/tensor_manip.cpp b/imperative/src/impl/ops/tensor_manip.cpp index abb3c6a1d..acb0ad462 100644 --- a/imperative/src/impl/ops/tensor_manip.cpp +++ b/imperative/src/impl/ops/tensor_manip.cpp @@ -252,9 +252,8 @@ SmallVector param_pack_concat_apply_on_physical_tensor( HostTensorStorage srcs_storage; srcs_storage.reset(comp_node, srcs_size, srcs_ptr); caller.op->exec( - {srcs_raw_ptr, srcs_layout}, inputs.back()->dev_tensor().as_megdnn(), - output->dev_tensor().as_megdnn(), - caller.create_workspace({{ws_size}, dtype::Byte()})); + {srcs_raw_ptr, srcs_layout}, inputs.back()->dnn_tensor(), + output->dnn_tensor(), caller.create_workspace(ws_size)); async_release(HostTensorND{comp_node, srcs_layout}.storage(srcs_storage)); return {output}; } diff --git a/imperative/src/impl/ops/vision.cpp b/imperative/src/impl/ops/vision.cpp index 5cfb223bd..33dbd0399 100644 --- a/imperative/src/impl/ops/vision.cpp +++ b/imperative/src/impl/ops/vision.cpp @@ -89,8 +89,8 @@ SmallVector apply_on_physical_tensor( size_t sz = dnn_opr.op->get_workspace_in_bytes( inputs[0]->layout(), inputs[1]->layout(), out_layout, ind_layout); - TensorLayout w_layout({sz}, dtype::Byte()); - auto dnn_wk = dnn_opr.create_workspace(w_layout); + + auto dnn_wk = dnn_opr.create_workspace(sz); dnn_opr.op->exec( inputs[0]->dnn_tensor(), inputs[1]->dnn_tensor(), out.as_megdnn(), diff --git a/imperative/src/impl/physical_tensor.cpp b/imperative/src/impl/physical_tensor.cpp index 0785edee1..4a492aac3 100644 --- a/imperative/src/impl/physical_tensor.cpp +++ b/imperative/src/impl/physical_tensor.cpp @@ -566,9 +566,13 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) { return ret; } +bool Tensor::empty() { + return !m_blob->size(); +} + megdnn::TensorND Tensor::dnn_tensor() { mgb_assert(m_blob, "uninitialized tensor."); - return {m_layout, {m_blob->storage().get(), m_offset}}; + return DnnTensorND{m_layout, m_blob->storage(), m_offset}; } void Tensor::fetch_value() { diff --git a/imperative/src/include/megbrain/imperative/physical_tensor.h b/imperative/src/include/megbrain/imperative/physical_tensor.h index 7963ecd77..2085e723e 100644 --- a/imperative/src/include/megbrain/imperative/physical_tensor.h +++ b/imperative/src/include/megbrain/imperative/physical_tensor.h @@ -10,6 +10,7 @@ #include "megbrain/imperative/resource_manager.h" #include "megbrain/tensor.h" #include "megbrain/utils/metahelper.h" +#include "megdnn/basic_types.h" namespace mgb { namespace imperative { @@ -87,6 +88,22 @@ using EventPtr = std::unique_ptr; class Tensor; using TensorPtr = std::shared_ptr; + +/* + using DnnTensorND to save the reference count of workspace + allocted by blobmanager to prevent invalidation +*/ +struct DnnTensorND : megdnn::TensorND { +private: + std::shared_ptr m_reference; + +public: + DnnTensorND(TensorLayout& layout_, std::shared_ptr ref_ptr, size_t offset) + : megdnn::TensorND(layout_, {ref_ptr.get(), offset}) { + m_reference = ref_ptr; + } +}; + class Tensor : public NonCopyableObj { public: Tensor() = default; @@ -131,6 +148,8 @@ public: void to_contiguous_inplace(); + bool empty(); + DeviceTensorND dev_tensor(bool contiguous = true); void assign_from_dev_tensor(DeviceTensorND); diff --git a/src/opr/impl/basic_arith.cpp b/src/opr/impl/basic_arith.cpp index 15b87f815..93309027b 100644 --- a/src/opr/impl/basic_arith.cpp +++ b/src/opr/impl/basic_arith.cpp @@ -258,9 +258,9 @@ void Elemwise::perform( } void Elemwise::perform_dnn( - CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, + CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs, intl::UniqPtrWithCN& opr) { - call_megdnn_opr_exec(cn, inputs, dest.as_megdnn(), opr.get(), nullptr); + call_megdnn_opr_exec(cn, inputs, dest, opr.get(), nullptr); } TensorLayoutArray Elemwise::collective_collapse(const TensorLayoutArray& layouts) { diff --git a/src/opr/include/megbrain/opr/basic_arith.h b/src/opr/include/megbrain/opr/basic_arith.h index 47f4391da..bfbf5b4f9 100644 --- a/src/opr/include/megbrain/opr/basic_arith.h +++ b/src/opr/include/megbrain/opr/basic_arith.h @@ -78,7 +78,7 @@ public: intl::UniqPtrWithCN& opr); MGE_WIN_DECLSPEC_FUC static void perform_dnn( - CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, + CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs, intl::UniqPtrWithCN& opr); using TensorLayoutPtrArray = SmallVector; -- GitLab