提交 d19fc2c1 编写于 作者: M Megvii Engine Team

fix(imperative): add alloc TensorPtr in imperative

GitOrigin-RevId: 1b438fc436cc752f5437b2bd047006f0e0b1b2e5
上级 d1b6c040
...@@ -27,10 +27,16 @@ struct DnnOprCaller { ...@@ -27,10 +27,16 @@ struct DnnOprCaller {
return mgb::opr::intl::create_megdnn_opr<Opr>(cn); return mgb::opr::intl::create_megdnn_opr<Opr>(cn);
} }
megdnn::Workspace create_workspace(TensorLayout layout) { Workspace create_workspace(size_t sz) {
dev_tensor = Tensor::make(layout, cn)->dev_tensor(); if (workspace.raw_ptr) {
workspace = mgb_throw(MegBrainError, "workspace should not be applicated many times");
megdnn::Workspace(dev_tensor.raw_ptr(), dev_tensor.storage().size()); }
if (sz) {
TensorLayout layout({sz}, dtype::Byte());
dev_tensor = Tensor::make(layout, cn)->dev_tensor();
workspace = megdnn::Workspace(
dev_tensor.raw_ptr(), dev_tensor.storage().size());
}
return workspace; return workspace;
} }
......
...@@ -135,21 +135,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -135,21 +135,16 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
param.window_w = IW - (OW - 1) * param.stride_w; param.window_w = IW - (OW - 1) * param.stride_w;
TensorND src = inputs[0]->dnn_tensor(); TensorND src = inputs[0]->dnn_tensor();
DeviceTensorND dst = auto dst = Tensor::make(dst_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout);
size_t sz = setup_algo<megdnn::Pooling>( size_t sz = setup_algo<megdnn::Pooling>(
{src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, {src_layout, dst_layout}, dnn_opr.op.get(), 0, false, false, cn,
::megdnn::param::ExecutionPolicy{}, false); ::megdnn::param::ExecutionPolicy{}, false);
megdnn::Workspace dnn_wk; auto dnn_wk = dnn_opr.create_workspace(sz);
if (sz) { dnn_opr.op->exec(src, dst->dnn_tensor(), dnn_wk);
TensorLayout w_layout({sz}, dtype::Byte());
dnn_wk = dnn_opr.create_workspace(w_layout);
}
dnn_opr.op->exec(src, dst.as_megdnn(), dnn_wk);
return {Tensor::make(dst)}; return {dst};
} }
OP_TRAIT_REG(AdaptivePooling, AdaptivePooling) OP_TRAIT_REG(AdaptivePooling, AdaptivePooling)
......
...@@ -160,10 +160,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -160,10 +160,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
bool empty_input = src_layout.is_empty(); bool empty_input = src_layout.is_empty();
size_t nr_inp = inputs.size(); size_t nr_inp = inputs.size();
DeviceTensorND reserve;
size_t sz = 0, rsz = 0; size_t sz = 0, rsz = 0;
TensorLayout w_layout({sz}, dtype::Byte());
TensorLayout r_layout({rsz}, dtype::Byte()); TensorLayout r_layout({rsz}, dtype::Byte());
if (!empty_input) { if (!empty_input) {
...@@ -172,79 +170,71 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -172,79 +170,71 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
src_layout, src_layout, src_layout); src_layout, src_layout, src_layout);
rsz = dnn_opr.op->get_reserve_in_bytes(src_layout); rsz = dnn_opr.op->get_reserve_in_bytes(src_layout);
w_layout = TensorLayout({sz}, dtype::Byte());
r_layout = TensorLayout({rsz}, dtype::Byte()); r_layout = TensorLayout({rsz}, dtype::Byte());
} }
auto dnn_wk = dnn_opr.create_workspace(w_layout); auto dnn_wk = dnn_opr.create_workspace(sz);
reserve = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, r_layout); auto reserve = Tensor::make(r_layout, comp_node);
// alloc memory // alloc memory
DeviceTensorND y = auto y = Tensor::make(src_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, src_layout);
DeviceTensorND save_mean = auto save_mean = Tensor::make(scale_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout);
DeviceTensorND save_variance = auto save_variance = Tensor::make(scale_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, scale_layout);
if (op_def.fwd_mode == ::megdnn::param::BN::FwdMode::INFERENCE) { if (op_def.fwd_mode == ::megdnn::param::BN::FwdMode::INFERENCE) {
if (!empty_input) if (!empty_input)
dnn_opr.op->exec( dnn_opr.op->exec(
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[0], inp_tensornds[1], inp_tensornds[2],
inp_tensornds[3], inp_tensornds[4], save_mean.as_megdnn(), inp_tensornds[3], inp_tensornds[4], save_mean->dnn_tensor(),
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(),
dnn_wk); dnn_wk);
return {inputs[3], inputs[4], Tensor::make(reserve), Tensor::make(y)}; return {inputs[3], inputs[4], reserve, y};
} else { } else {
DeviceTensorND mean, variance;
if (nr_inp == 5) { if (nr_inp == 5) {
mean = BlobManager::inst()->alloc_workspace_with_defrag( auto mean = Tensor::make(scale_layout, comp_node);
comp_node, scale_layout);
variance = BlobManager::inst()->alloc_workspace_with_defrag( auto variance = Tensor::make(scale_layout, comp_node);
comp_node, scale_layout);
megdnn::RefPtr src_ptr1( megdnn::RefPtr src_ptr1(
inp_tensornds[3].get_ref_ptr().get_ptr(), inputs[3]->offset()); inp_tensornds[3].get_ref_ptr().get_ptr(), inputs[3]->offset());
megdnn::RefPtr dst_ptr1( megdnn::RefPtr dst_ptr1(
mean.storage().get_ref_ptr(), mean.storage().offset(), false); mean->dev_tensor().storage().get_ref_ptr(),
mean->dev_tensor().storage().offset(), false);
comp_node.peer_copy_to_ref( comp_node.peer_copy_to_ref(
comp_node, dst_ptr1, src_ptr1, scale_layout.span().high_byte); comp_node, dst_ptr1, src_ptr1, scale_layout.span().high_byte);
megdnn::RefPtr src_ptr2( megdnn::RefPtr src_ptr2(
inp_tensornds[4].get_ref_ptr().get_ptr(), inputs[4]->offset()); inp_tensornds[4].get_ref_ptr().get_ptr(), inputs[4]->offset());
megdnn::RefPtr dst_ptr2( megdnn::RefPtr dst_ptr2(
variance.storage().get_ref_ptr(), variance.storage().offset(), variance->dev_tensor().storage().get_ref_ptr(),
false); variance->dev_tensor().storage().offset(), false);
comp_node.peer_copy_to_ref( comp_node.peer_copy_to_ref(
comp_node, dst_ptr2, src_ptr2, scale_layout.span().high_byte); comp_node, dst_ptr2, src_ptr2, scale_layout.span().high_byte);
if (!empty_input) if (!empty_input)
dnn_opr.op->exec( dnn_opr.op->exec(
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[0], inp_tensornds[1], inp_tensornds[2],
mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), mean->dnn_tensor(), variance->dnn_tensor(),
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), save_mean->dnn_tensor(), save_variance->dnn_tensor(),
dnn_wk); reserve->dnn_tensor(), y->dnn_tensor(), dnn_wk);
return {Tensor::make(mean), Tensor::make(variance), return {mean, variance, save_mean, save_variance, reserve, y};
Tensor::make(save_mean), Tensor::make(save_variance),
Tensor::make(reserve), Tensor::make(y)};
} }
TensorLayout m_layout({0}, scale_layout.dtype); TensorLayout m_layout({0}, scale_layout.dtype);
mean = BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout); auto mean = Tensor::make(m_layout, comp_node);
variance = auto variance = Tensor::make(m_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, m_layout);
if (!empty_input) { if (!empty_input) {
dnn_opr.op->exec( dnn_opr.op->exec(
inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[0], inp_tensornds[1], inp_tensornds[2],
mean.as_megdnn(), variance.as_megdnn(), save_mean.as_megdnn(), mean->dnn_tensor(), variance->dnn_tensor(), save_mean->dnn_tensor(),
save_variance.as_megdnn(), reserve.as_megdnn(), y.as_megdnn(), save_variance->dnn_tensor(), reserve->dnn_tensor(), y->dnn_tensor(),
dnn_wk); dnn_wk);
} }
return {Tensor::make(save_mean), Tensor::make(save_variance), return {save_mean, save_variance, reserve, y};
Tensor::make(reserve), Tensor::make(y)};
} }
} }
......
...@@ -44,10 +44,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -44,10 +44,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
DnnOprCaller<megdnn::CondTake> dnn_op(inp->comp_node()); DnnOprCaller<megdnn::CondTake> dnn_op(inp->comp_node());
dnn_op.op->param().val = 1; dnn_op.op->param().val = 1;
TensorLayout m_layout( size_t sz = dnn_op.op->get_workspace_in_bytes(inp->layout());
{dnn_op.op->get_workspace_in_bytes(inp->layout())}, dtype::Byte());
auto dnn_workspace = dnn_op.create_workspace(m_layout); auto dnn_workspace = dnn_op.create_workspace(sz);
dnn_op.op->exec( dnn_op.op->exec(
inp->dev_tensor().as_megdnn(), msk->dev_tensor().as_megdnn(), inp->dev_tensor().as_megdnn(), msk->dev_tensor().as_megdnn(),
......
...@@ -165,11 +165,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -165,11 +165,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout empty_shp({0}, inputs[0]->dtype()); TensorLayout empty_shp({0}, inputs[0]->dtype());
empty_shp.ndim = 0; empty_shp.ndim = 0;
DeviceTensorND empty_bias = auto empty_bias = Tensor::make(empty_shp, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, empty_shp);
inp_tensornds[2] = empty_bias.as_megdnn(); inp_tensornds[2] = empty_bias->dnn_tensor();
inp_tensornds[3] = empty_bias.as_megdnn(); inp_tensornds[3] = empty_bias->dnn_tensor();
size_t sz = setup_algo<megdnn::ConvBiasForward>( size_t sz = setup_algo<megdnn::ConvBiasForward>(
{inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]}, {inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]},
...@@ -177,17 +176,15 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -177,17 +176,15 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
&inp_tensornds); &inp_tensornds);
// alloc memory // alloc memory
DeviceTensorND out = auto out = Tensor::make(out_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout);
TensorLayout w_layout({sz}, dtype::Byte()); auto dnn_wk = dnn_opr.create_workspace(sz);
auto dnn_wk = dnn_opr.create_workspace(w_layout);
// exeucte // exeucte
dnn_opr.op->exec( dnn_opr.op->exec(
inp_tensornds[0], inp_tensornds[1], empty_bias.as_megdnn(), inp_tensornds[0], inp_tensornds[1], inp_tensornds[2], inp_tensornds[3],
empty_bias.as_megdnn(), out.as_megdnn(), nullptr, dnn_wk); out->dnn_tensor(), nullptr, dnn_wk);
return {Tensor::make(out)}; return {out};
} }
OP_TRAIT_REG(Convolution, Convolution, opr::Convolution) OP_TRAIT_REG(Convolution, Convolution, opr::Convolution)
...@@ -368,6 +365,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -368,6 +365,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
def, inputs[1]->layout().ndim, inputs[0]->layout(), inputs[1]->layout(), def, inputs[1]->layout().ndim, inputs[0]->layout(), inputs[1]->layout(),
cn); cn);
auto out = Tensor::make(out_layout, cn);
using TensorND = megdnn::TensorND; using TensorND = megdnn::TensorND;
SmallVector<TensorND> inp_tensornds(inputs.size()); SmallVector<TensorND> inp_tensornds(inputs.size());
TensorLayoutArray inp_shapes(inputs.size()), oup_shapes(output_descs.size()); TensorLayoutArray inp_shapes(inputs.size()), oup_shapes(output_descs.size());
...@@ -383,16 +382,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -383,16 +382,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false, {inp_shapes[0], inp_shapes[1], oup_shapes[0]}, dnn_opr.op.get(), 0, false,
false, cn, convbwd.policy(), false, &inp_tensornds); false, cn, convbwd.policy(), false, &inp_tensornds);
DeviceTensorND out = auto dnn_wk = dnn_opr.create_workspace(sz);
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout);
auto wk = Blob::make(cn, sz);
auto ptr = wk->storage().get();
megdnn::Workspace dnn_wk(ptr, sz);
// exeucte // exeucte
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk);
return {Tensor::make(out)}; return {out};
} }
OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData) OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData)
...@@ -549,18 +543,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -549,18 +543,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
false, cn, conv.policy(), false, &inp_tensornds); false, cn, conv.policy(), false, &inp_tensornds);
// alloc memory // alloc memory
DeviceTensorND out = auto out = Tensor::make(out_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout);
megdnn::Workspace dnn_wk; auto dnn_wk = dnn_opr.create_workspace(sz);
if (sz != 0) {
TensorLayout w_layout({sz}, dtype::Byte());
dnn_wk = dnn_opr.create_workspace(w_layout);
}
// exeucte // exeucte
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk);
return {Tensor::make(out)}; return {out};
} }
OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D) OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D)
...@@ -615,8 +604,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -615,8 +604,7 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn::Convolution3DBackwardData::deduce_layout_impl( megdnn::Convolution3DBackwardData::deduce_layout_impl(
wlayout, dlayout, op_def.param(), oup_layout); wlayout, dlayout, op_def.param(), oup_layout);
} }
DeviceTensorND oup = auto oup = Tensor::make(oup_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout);
SmallVector<megdnn::TensorND> inp_tensornds(inputs.size()); SmallVector<megdnn::TensorND> inp_tensornds(inputs.size());
inp_tensornds[0] = inputs[0]->dnn_tensor(); inp_tensornds[0] = inputs[0]->dnn_tensor();
...@@ -624,14 +612,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -624,14 +612,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t wk_size = setup_algo<megdnn::Convolution3DBackwardData>( size_t wk_size = setup_algo<megdnn::Convolution3DBackwardData>(
{wlayout, dlayout, oup_layout}, dnn_opr.get(), 0, false, false, cn, {wlayout, dlayout, oup_layout}, dnn_opr.get(), 0, false, false, cn,
op_def.policy(), false, &inp_tensornds); op_def.policy(), false, &inp_tensornds);
megdnn::Workspace dnn_wk; auto dnn_wk = caller.create_workspace(wk_size);
if (wk_size != 0) {
TensorLayout w_layout({wk_size}, dtype::Byte());
dnn_wk = caller.create_workspace(w_layout);
}
dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup.as_megdnn(), dnn_wk); dnn_opr->exec(inp_tensornds[0], inp_tensornds[1], oup->dnn_tensor(), dnn_wk);
return {Tensor::make(oup)}; return {oup};
} }
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) { auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
......
...@@ -121,10 +121,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -121,10 +121,10 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn::Elemwise::deduce_shape(inp_shapes, layout); megdnn::Elemwise::deduce_shape(inp_shapes, layout);
layout.init_contiguous_stride(); layout.init_contiguous_stride();
DeviceTensorND out = auto out = Tensor::make(layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout);
if (is_empty) { if (is_empty) {
return {Tensor::make(out)}; return {out};
} }
DnnOprCaller<megdnn::Elemwise> dnn_opr(comp_node); DnnOprCaller<megdnn::Elemwise> dnn_opr(comp_node);
...@@ -133,12 +133,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -133,12 +133,13 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 || dnn_opr.op->param().mode == Mode::FUSE_MUL_ADD4 ||
(inp_tensornds.size() && (inp_tensornds.size() &&
inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) { inp_tensornds[0].layout.dtype.category() == DTypeCategory::QUANTIZED)) {
opr::Elemwise::perform_dnn(comp_node, out, inp_tensornds, dnn_opr.op); opr::Elemwise::perform_dnn(
comp_node, out->dnn_tensor(), inp_tensornds, dnn_opr.op);
} else { } else {
dnn_opr.op->exec(inp_tensornds, out.as_megdnn()); dnn_opr.op->exec(inp_tensornds, out->dnn_tensor());
} }
return {Tensor::make(out)}; return {out};
} }
MGB_DEFINE_OPR_CLASS( MGB_DEFINE_OPR_CLASS(
......
...@@ -85,10 +85,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -85,10 +85,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorPtr out = Tensor::make(tlayout, inp->comp_node()); TensorPtr out = Tensor::make(tlayout, inp->comp_node());
megdnn::TensorND in = inp->dnn_tensor(); megdnn::TensorND in = inp->dnn_tensor();
megdnn::TensorND ind = index->dnn_tensor(); megdnn::TensorND ind = index->dnn_tensor();
TensorLayout m_layout( size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout);
{dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)},
dtype::Byte()); auto dnn_workspace = dnn_op.create_workspace(sz);
auto dnn_workspace = dnn_op.create_workspace(m_layout);
dnn_op.op->exec(in, ind, out->dnn_tensor(), dnn_workspace); dnn_op.op->exec(in, ind, out->dnn_tensor(), dnn_workspace);
return {out}; return {out};
} }
...@@ -152,10 +151,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -152,10 +151,9 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn::TensorND in = inp->dnn_tensor(); megdnn::TensorND in = inp->dnn_tensor();
megdnn::TensorND ind = index->dnn_tensor(); megdnn::TensorND ind = index->dnn_tensor();
megdnn::TensorND su = sub->dnn_tensor(); megdnn::TensorND su = sub->dnn_tensor();
TensorLayout m_layout(
{dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout)}, size_t sz = dnn_op.op->get_workspace_in_bytes(layout, index_layout, tlayout);
dtype::Byte()); auto dnn_workspace = dnn_op.create_workspace(sz);
auto dnn_workspace = dnn_op.create_workspace(m_layout);
dnn_op.op->exec(out->dnn_tensor(), ind, su, dnn_workspace); dnn_op.op->exec(out->dnn_tensor(), ind, su, dnn_workspace);
return {out}; return {out};
} }
......
...@@ -45,29 +45,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -45,29 +45,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
TensorLayout v_t_1_layout{v_t_1->layout()}; TensorLayout v_t_1_layout{v_t_1->layout()};
TensorLayout lamb_param_layout{lamb_param->layout()}; TensorLayout lamb_param_layout{lamb_param->layout()};
DeviceTensorND m_t = BlobManager::inst()->alloc_workspace_with_defrag( auto m_t = Tensor::make(m_t_1_layout, m_t_1->comp_node());
m_t_1->comp_node(), m_t_1_layout);
DeviceTensorND v_t = BlobManager::inst()->alloc_workspace_with_defrag( auto v_t = Tensor::make(v_t_1_layout, v_t_1->comp_node());
v_t_1->comp_node(), v_t_1_layout);
DeviceTensorND new_param = BlobManager::inst()->alloc_workspace_with_defrag( auto new_param = Tensor::make(lamb_param_layout, lamb_param->comp_node());
lamb_param->comp_node(), lamb_param_layout);
DnnOprCaller<megdnn::LAMBUpdate> caller{lamb_param->comp_node()}; DnnOprCaller<megdnn::LAMBUpdate> caller{lamb_param->comp_node()};
TensorLayout m_layout( size_t sz = caller.op->get_workspace_in_bytes(
{caller.op->get_workspace_in_bytes( m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), grad->layout(),
m_t_1->layout(), v_t_1->layout(), lamb_param->layout(), m_t->layout(), v_t->layout(), new_param->layout());
grad->layout(), m_t.layout(), v_t.layout(), new_param.layout())},
dtype::Byte());
auto dnn_workspace = caller.create_workspace(m_layout); auto dnn_workspace = caller.create_workspace(sz);
caller.op->param() = op.param(); caller.op->param() = op.param();
caller.op->exec( caller.op->exec(
m_t_1->dev_tensor().as_megdnn(), v_t_1->dev_tensor().as_megdnn(), m_t_1->dev_tensor().as_megdnn(), v_t_1->dev_tensor().as_megdnn(),
lamb_param->dev_tensor().as_megdnn(), grad->dev_tensor().as_megdnn(), lamb_param->dev_tensor().as_megdnn(), grad->dev_tensor().as_megdnn(),
m_t.as_megdnn(), v_t.as_megdnn(), new_param.as_megdnn(), dnn_workspace); m_t->dnn_tensor(), v_t->dnn_tensor(), new_param->dnn_tensor(),
return {Tensor::make(m_t), Tensor::make(v_t), Tensor::make(new_param)}; dnn_workspace);
return {m_t, v_t, new_param};
} }
OP_TRAIT_REG(LAMBUpdate, LAMBUpdate) OP_TRAIT_REG(LAMBUpdate, LAMBUpdate)
......
...@@ -77,32 +77,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -77,32 +77,25 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn::LayerNorm::deduce_layout_fwd_impl( megdnn::LayerNorm::deduce_layout_fwd_impl(
inputs[0]->dnn_tensor().layout, p, oup_layout, mean_layout, rstd_layout); inputs[0]->dnn_tensor().layout, p, oup_layout, mean_layout, rstd_layout);
DeviceTensorND out_devtensor = auto out = Tensor::make(oup_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout);
DeviceTensorND mean_devtensor = auto mean = Tensor::make(mean_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, mean_layout);
DeviceTensorND rstd_devtensor = auto rstd = Tensor::make(rstd_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, rstd_layout);
megdnn::Workspace dnn_wk;
auto wk_size = caller.op->get_workspace_in_bytes( auto wk_size = caller.op->get_workspace_in_bytes(
inputs[0]->dnn_tensor().layout, inputs[0]->dnn_tensor().layout,
p.affine ? inputs[1]->dnn_tensor().layout : TensorLayout(), p.affine ? inputs[1]->dnn_tensor().layout : TensorLayout(),
p.affine ? inputs[2]->dnn_tensor().layout : TensorLayout(), oup_layout, p.affine ? inputs[2]->dnn_tensor().layout : TensorLayout(), oup_layout,
mean_layout, rstd_layout); mean_layout, rstd_layout);
if (wk_size != 0) { auto dnn_wk = caller.create_workspace(wk_size);
TensorLayout w_layout({wk_size}, dtype::Byte());
dnn_wk = caller.create_workspace(w_layout);
}
dnn_opr->exec( caller.op->exec(
inputs[0]->dnn_tensor(), inputs[0]->dnn_tensor(),
p.affine ? inputs[1]->dnn_tensor() : megdnn::TensorND(), p.affine ? inputs[1]->dnn_tensor() : megdnn::TensorND(),
p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), p.affine ? inputs[2]->dnn_tensor() : megdnn::TensorND(), out->dnn_tensor(),
out_devtensor.as_megdnn(), mean_devtensor.as_megdnn(), mean->dnn_tensor(), rstd->dnn_tensor(), dnn_wk);
rstd_devtensor.as_megdnn(), dnn_wk); return {out, mean, rstd};
return {Tensor::make(out_devtensor), Tensor::make(mean_devtensor),
Tensor::make(rstd_devtensor)};
} }
OP_TRAIT_REG(LayerNorm, LayerNorm) OP_TRAIT_REG(LayerNorm, LayerNorm)
......
...@@ -185,12 +185,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -185,12 +185,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
} }
if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) {
DeviceTensorND out = auto out = Tensor::make(real_dst_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, real_dst_layout);
if (!out.empty()) { if (!out->empty()) {
dev_tensor_memset(out, 0); dev_tensor_memset(out->dev_tensor(), 0);
} }
return {Tensor::make(out)}; return {out};
} }
TensorLayout layout_a = layout1, layout_b = layout2; TensorLayout layout_a = layout1, layout_b = layout2;
...@@ -232,13 +232,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -232,13 +232,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t sz = setup_algo<megdnn::MatrixMul>( size_t sz = setup_algo<megdnn::MatrixMul>(
{layout_a, layout_b, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, {layout_a, layout_b, dst_layout}, dnn_opr.op.get(), 0, false, false, cn,
matmul.policy(), false, &inp_tensornds); matmul.policy(), false, &inp_tensornds);
DeviceTensorND out = auto out = Tensor::make(dst_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout); auto dnn_wk = dnn_opr.create_workspace(sz);
TensorLayout w_layout({sz}, dtype::Byte());
auto dnn_wk = dnn_opr.create_workspace(w_layout);
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk);
return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(real_dst_layout)))}; return {out->sub(0, real_dst_layout)};
} }
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint(
...@@ -461,12 +459,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -461,12 +459,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dst_layout.init_contiguous_stride(); dst_layout.init_contiguous_stride();
if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) { if (dim1 == 0 || dim2 == 0 || layout1[layout1.ndim - 1] == 0) {
DeviceTensorND out = auto out = Tensor::make(dst_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout);
if (!out.empty()) { if (!out->empty()) {
dev_tensor_memset(out, 0); dev_tensor_memset(out->dev_tensor(), 0);
} }
return {Tensor::make(out)}; return {out};
} }
SmallVector<megdnn::TensorND> inp_tensornds(2u); SmallVector<megdnn::TensorND> inp_tensornds(2u);
...@@ -479,19 +477,17 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -479,19 +477,17 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{layout1, layout2, dst_layout}, dnn_opr.op.get(), 0, false, false, cn, {layout1, layout2, dst_layout}, dnn_opr.op.get(), 0, false, false, cn,
matmul.policy(), false, &inp_tensornds); matmul.policy(), false, &inp_tensornds);
DeviceTensorND out = auto out = Tensor::make(dst_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, dst_layout);
TensorLayout w_layout({sz}, dtype::Byte()); auto dnn_wk = dnn_opr.create_workspace(sz);
auto dnn_wk = dnn_opr.create_workspace(w_layout); dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk);
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk);
shp1[shp1.ndim - 2] = dst_layout[dst_layout.ndim - 2]; shp1[shp1.ndim - 2] = dst_layout[dst_layout.ndim - 2];
shp1[shp1.ndim - 1] = dst_layout[dst_layout.ndim - 1]; shp1[shp1.ndim - 1] = dst_layout[dst_layout.ndim - 1];
if (maxdim > 3) { if (maxdim > 3) {
dst_layout = dst_layout.reshape(shp1); dst_layout = dst_layout.reshape(shp1);
} }
return {Tensor::make(out.sub(SubTensorSpec::make_from_layout(dst_layout)))}; return {out->sub(0, dst_layout)};
} }
SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint( SmallVector<VarNode::LayoutConstraintCallback> get_input_layout_constraint(
...@@ -540,27 +536,23 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -540,27 +536,23 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout); dnn_opr.op->deduce_layout(inp1_tensor.layout, inp2_tensor.layout, oup_layout);
if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) { if (inputs[0]->layout().is_empty() || inputs[1]->layout().is_empty()) {
DeviceTensorND out = auto out = Tensor::make(oup_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout); if (!out->empty()) {
if (!out.empty()) { dev_tensor_memset(out->dev_tensor(), 0);
dev_tensor_memset(out, 0);
} }
return {Tensor::make(out)}; return {out};
} }
auto sz = dnn_opr.op->get_workspace_in_bytes( auto sz = dnn_opr.op->get_workspace_in_bytes(
inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout); inp_tensornds[0].layout, inp_tensornds[1].layout, output_descs[0].layout);
DeviceTensorND out_devtensor = auto out = Tensor::make(oup_layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, oup_layout);
TensorLayout w_layout({sz}, dtype::Byte()); auto dnn_wk = dnn_opr.create_workspace(sz);
auto dnn_wk = dnn_opr.create_workspace(w_layout);
dnn_opr.op->exec( dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out->dnn_tensor(), dnn_wk);
inp_tensornds[0], inp_tensornds[1], out_devtensor.as_megdnn(), dnn_wk);
return {Tensor::make(out_devtensor)}; return {out};
} }
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
......
...@@ -36,9 +36,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -36,9 +36,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
megdnn::CheckNonFinite::Param param({op.scale}); megdnn::CheckNonFinite::Param param({op.scale});
dnn_opr.op->param() = param; dnn_opr.op->param() = param;
size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout()); size_t sz = dnn_opr.op->get_workspace_in_bytes(srcs, dest->layout());
TensorLayout w_layout({sz}, dtype::Byte()); auto dnn_wk = dnn_opr.create_workspace(sz);
auto dnn_wk = dnn_opr.create_workspace(w_layout); dnn_opr.op->exec(srcs, dest->dnn_tensor(), dnn_wk);
dnn_opr.op->exec(srcs, dest->dev_tensor().as_megdnn(), dnn_wk);
return outputs; return outputs;
} }
......
...@@ -66,17 +66,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -66,17 +66,12 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
{inp_tensornds[0].layout, oup_layout}, dnn_opr.get(), 0, false, false, cn, {inp_tensornds[0].layout, oup_layout}, dnn_opr.get(), 0, false, false, cn,
op_def.policy(), false, &inp_tensornds); op_def.policy(), false, &inp_tensornds);
DeviceTensorND out_devtensor = auto out = Tensor::make(oup_layout, cn);
BlobManager::inst()->alloc_workspace_with_defrag(cn, oup_layout);
megdnn::Workspace dnn_wk; auto dnn_wk = caller.create_workspace(wk_size);
if (wk_size) {
TensorLayout w_layout({wk_size}, dtype::Byte());
dnn_wk = caller.create_workspace(w_layout);
}
dnn_opr->exec(inp_tensornds[0], out_devtensor.as_megdnn(), dnn_wk); caller.op->exec(inp_tensornds[0], out->dnn_tensor(), dnn_wk);
return {Tensor::make(out_devtensor)}; return {out};
} }
OP_TRAIT_REG(Pooling, Pooling) OP_TRAIT_REG(Pooling, Pooling)
......
...@@ -117,20 +117,20 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -117,20 +117,20 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
layout.remove_axis_inplace(axis); layout.remove_axis_inplace(axis);
layout.init_contiguous_stride(); layout.init_contiguous_stride();
} }
DeviceTensorND out = auto out = Tensor::make(layout, comp_node);
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout);
std::string err_msg; std::string err_msg;
switch (mode) { switch (mode) {
case Reduce::Mode::SUM: case Reduce::Mode::SUM:
if (!out.empty()) { if (!out->empty()) {
dev_tensor_memset(out, 0); dev_tensor_memset(out->dev_tensor(), 0);
} }
break; break;
case Reduce::Mode::PRODUCT: case Reduce::Mode::PRODUCT:
if (!out.empty()) { if (!out->empty()) {
DnnOprCaller<megdnn::Fill> fill_op(comp_node); DnnOprCaller<megdnn::Fill> fill_op(comp_node);
fill_op.op->param() = 1; fill_op.op->param() = 1;
fill_op.op->exec(out.as_megdnn(), {}); fill_op.op->exec(out->dnn_tensor(), {});
} }
break; break;
case Reduce::Mode::MEAN: case Reduce::Mode::MEAN:
...@@ -153,34 +153,29 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -153,34 +153,29 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
MegBrainError, "empty input is not allowed for reduce mode: %s", MegBrainError, "empty input is not allowed for reduce mode: %s",
err_msg.c_str()); err_msg.c_str());
} }
return {Tensor::make(out)}; return {out};
} }
auto dnn_ten = inputs[0]->dnn_tensor(); auto dnn_ten = inputs[0]->dnn_tensor();
dnn_ten.layout = src; dnn_ten.layout = src;
inp_tensornds.push_back(dnn_ten); inp_tensornds.push_back(dnn_ten);
megdnn::Workspace dnn_wk;
auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout); auto wk_size = dnn_op.op->get_workspace_in_bytes(src, layout);
if (wk_size) { auto dnn_wk = dnn_op.create_workspace(wk_size);
TensorLayout w_layout({wk_size}, dtype::Byte()); TensorLayout ori_layout = layout;
dnn_wk = dnn_op.create_workspace(w_layout);
}
DeviceTensorND out =
BlobManager::inst()->alloc_workspace_with_defrag(comp_node, layout);
dnn_op.op->exec(inp_tensornds[0], out.as_megdnn(), dnn_wk);
if (!keepdim && src.ndim > 1) { if (!keepdim && src.ndim > 1) {
auto out_layout = out.layout(); layout.remove_axis_inplace(axis);
out_layout.remove_axis_inplace(axis); layout.init_contiguous_stride();
out_layout.init_contiguous_stride();
out.resize(out_layout);
} }
return {Tensor::make(out)}; auto out = Tensor::make(layout, comp_node);
auto dnn_out = out->dnn_tensor();
dnn_out.layout = ori_layout;
dnn_op.op->exec(inp_tensornds[0], dnn_out, dnn_wk);
return {out};
} }
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
......
...@@ -252,9 +252,8 @@ SmallVector<TensorPtr> param_pack_concat_apply_on_physical_tensor( ...@@ -252,9 +252,8 @@ SmallVector<TensorPtr> param_pack_concat_apply_on_physical_tensor(
HostTensorStorage srcs_storage; HostTensorStorage srcs_storage;
srcs_storage.reset(comp_node, srcs_size, srcs_ptr); srcs_storage.reset(comp_node, srcs_size, srcs_ptr);
caller.op->exec( caller.op->exec(
{srcs_raw_ptr, srcs_layout}, inputs.back()->dev_tensor().as_megdnn(), {srcs_raw_ptr, srcs_layout}, inputs.back()->dnn_tensor(),
output->dev_tensor().as_megdnn(), output->dnn_tensor(), caller.create_workspace(ws_size));
caller.create_workspace({{ws_size}, dtype::Byte()}));
async_release(HostTensorND{comp_node, srcs_layout}.storage(srcs_storage)); async_release(HostTensorND{comp_node, srcs_layout}.storage(srcs_storage));
return {output}; return {output};
} }
......
...@@ -89,8 +89,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -89,8 +89,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
size_t sz = dnn_opr.op->get_workspace_in_bytes( size_t sz = dnn_opr.op->get_workspace_in_bytes(
inputs[0]->layout(), inputs[1]->layout(), out_layout, ind_layout); inputs[0]->layout(), inputs[1]->layout(), out_layout, ind_layout);
TensorLayout w_layout({sz}, dtype::Byte());
auto dnn_wk = dnn_opr.create_workspace(w_layout); auto dnn_wk = dnn_opr.create_workspace(sz);
dnn_opr.op->exec( dnn_opr.op->exec(
inputs[0]->dnn_tensor(), inputs[1]->dnn_tensor(), out.as_megdnn(), inputs[0]->dnn_tensor(), inputs[1]->dnn_tensor(), out.as_megdnn(),
......
...@@ -566,9 +566,13 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) { ...@@ -566,9 +566,13 @@ DeviceTensorND Tensor::dev_tensor(bool contiguous) {
return ret; return ret;
} }
bool Tensor::empty() {
return !m_blob->size();
}
megdnn::TensorND Tensor::dnn_tensor() { megdnn::TensorND Tensor::dnn_tensor() {
mgb_assert(m_blob, "uninitialized tensor."); mgb_assert(m_blob, "uninitialized tensor.");
return {m_layout, {m_blob->storage().get(), m_offset}}; return DnnTensorND{m_layout, m_blob->storage(), m_offset};
} }
void Tensor::fetch_value() { void Tensor::fetch_value() {
......
...@@ -10,6 +10,7 @@ ...@@ -10,6 +10,7 @@
#include "megbrain/imperative/resource_manager.h" #include "megbrain/imperative/resource_manager.h"
#include "megbrain/tensor.h" #include "megbrain/tensor.h"
#include "megbrain/utils/metahelper.h" #include "megbrain/utils/metahelper.h"
#include "megdnn/basic_types.h"
namespace mgb { namespace mgb {
namespace imperative { namespace imperative {
...@@ -87,6 +88,22 @@ using EventPtr = std::unique_ptr<CompNode::Event, EventDeleter>; ...@@ -87,6 +88,22 @@ using EventPtr = std::unique_ptr<CompNode::Event, EventDeleter>;
class Tensor; class Tensor;
using TensorPtr = std::shared_ptr<Tensor>; using TensorPtr = std::shared_ptr<Tensor>;
/*
using DnnTensorND to save the reference count of workspace
allocted by blobmanager to prevent invalidation
*/
struct DnnTensorND : megdnn::TensorND {
private:
std::shared_ptr<dt_byte> m_reference;
public:
DnnTensorND(TensorLayout& layout_, std::shared_ptr<dt_byte> ref_ptr, size_t offset)
: megdnn::TensorND(layout_, {ref_ptr.get(), offset}) {
m_reference = ref_ptr;
}
};
class Tensor : public NonCopyableObj { class Tensor : public NonCopyableObj {
public: public:
Tensor() = default; Tensor() = default;
...@@ -131,6 +148,8 @@ public: ...@@ -131,6 +148,8 @@ public:
void to_contiguous_inplace(); void to_contiguous_inplace();
bool empty();
DeviceTensorND dev_tensor(bool contiguous = true); DeviceTensorND dev_tensor(bool contiguous = true);
void assign_from_dev_tensor(DeviceTensorND); void assign_from_dev_tensor(DeviceTensorND);
......
...@@ -258,9 +258,9 @@ void Elemwise::perform( ...@@ -258,9 +258,9 @@ void Elemwise::perform(
} }
void Elemwise::perform_dnn( void Elemwise::perform_dnn(
CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs,
intl::UniqPtrWithCN<megdnn::Elemwise>& opr) { intl::UniqPtrWithCN<megdnn::Elemwise>& opr) {
call_megdnn_opr_exec(cn, inputs, dest.as_megdnn(), opr.get(), nullptr); call_megdnn_opr_exec(cn, inputs, dest, opr.get(), nullptr);
} }
TensorLayoutArray Elemwise::collective_collapse(const TensorLayoutArray& layouts) { TensorLayoutArray Elemwise::collective_collapse(const TensorLayoutArray& layouts) {
......
...@@ -78,7 +78,7 @@ public: ...@@ -78,7 +78,7 @@ public:
intl::UniqPtrWithCN<megdnn::Elemwise>& opr); intl::UniqPtrWithCN<megdnn::Elemwise>& opr);
MGE_WIN_DECLSPEC_FUC static void perform_dnn( MGE_WIN_DECLSPEC_FUC static void perform_dnn(
CompNode cn, DeviceTensorND& dest, megdnn::TensorNDArray& inputs, CompNode cn, const megdnn::TensorND& dest, megdnn::TensorNDArray& inputs,
intl::UniqPtrWithCN<megdnn::Elemwise>& opr); intl::UniqPtrWithCN<megdnn::Elemwise>& opr);
using TensorLayoutPtrArray = SmallVector<TensorLayout*>; using TensorLayoutPtrArray = SmallVector<TensorLayout*>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册