提交 df5ebd3d 编写于 作者: M Megvii Engine Team

fix(imperative/ops): fix the vmemory problem in 1.9

GitOrigin-RevId: 728e9837011d2bb3a39e645da9902721c9b55f68
上级 4e66e0eb
...@@ -194,6 +194,8 @@ def conv1d( ...@@ -194,6 +194,8 @@ def conv1d(
) )
(output,) = apply(op, inp, weight) (output,) = apply(op, inp, weight)
if bias is not None: if bias is not None:
if amp._enabled:
(bias,) = cast_tensors(bias)
output += bias output += bias
return output return output
...@@ -260,6 +262,8 @@ def conv2d( ...@@ -260,6 +262,8 @@ def conv2d(
) )
(output,) = apply(op, inp, weight) (output,) = apply(op, inp, weight)
if bias is not None: if bias is not None:
if amp._enabled:
(bias,) = cast_tensors(bias)
output += bias output += bias
return output return output
......
...@@ -896,13 +896,17 @@ def test_conv3d_zero_stride_numpy_array(): ...@@ -896,13 +896,17 @@ def test_conv3d_zero_stride_numpy_array():
out.numpy() out.numpy()
def test_conv1d(): @pytest.mark.parametrize("bias", [True, False])
def test_conv1d(bias):
inp = tensor(np.ones((2, 2, 4), dtype=np.float32)) inp = tensor(np.ones((2, 2, 4), dtype=np.float32))
weight = tensor(np.ones((3, 2, 2), dtype=np.float32)) weight = tensor(np.ones((3, 2, 2), dtype=np.float32))
out = F.conv1d(inp, weight, None, 2, 0, 1, 1) bias = tensor(np.ones((1, 3, 1), dtype=np.float32)) if bias else None
out = F.conv1d(inp, weight, bias, 2, 0, 1, 1)
np.testing.assert_equal( np.testing.assert_equal(
out.numpy(), out.numpy(),
np.array( np.array([[[5, 5], [5, 5], [5, 5]], [[5, 5], [5, 5], [5, 5]]], dtype=np.float32)
if bias is not None
else np.array(
[[[4, 4], [4, 4], [4, 4]], [[4, 4], [4, 4], [4, 4]]], dtype=np.float32 [[[4, 4], [4, 4], [4, 4]], [[4, 4], [4, 4], [4, 4]]], dtype=np.float32
), ),
) )
...@@ -928,13 +932,15 @@ def test_batchnorm2d_autocast(): ...@@ -928,13 +932,15 @@ def test_batchnorm2d_autocast():
np.testing.assert_allclose(out.numpy(), expected.numpy()) np.testing.assert_allclose(out.numpy(), expected.numpy())
def test_conv3d(): @pytest.mark.parametrize("bias", [True, False])
def test_conv3d(bias):
inp = tensor(np.ones((2, 2, 4, 4, 4), dtype=np.float32)) inp = tensor(np.ones((2, 2, 4, 4, 4), dtype=np.float32))
weight = tensor(np.ones((3, 2, 2, 2, 2), dtype=np.float32)) weight = tensor(np.ones((3, 2, 2, 2, 2), dtype=np.float32))
out = F.conv3d(inp, weight, None, 2, 0, 1, 1) bias = tensor(np.ones((1, 3, 1, 1, 1), dtype=np.float32)) if bias else None
np.testing.assert_equal( out = F.conv3d(inp, weight, bias, 2, 0, 1, 1)
out.numpy(), np.ones((2, 3, 2, 2, 2), dtype=np.float32) * 16 target = np.ones((2, 3, 2, 2, 2), dtype=np.float32) * 16
) target = target + 1 if bias is not None else target
np.testing.assert_equal(out.numpy(), target)
def test_condtake(): def test_condtake():
......
...@@ -117,13 +117,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( ...@@ -117,13 +117,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
desc.comp_node = inputs[0].comp_node; desc.comp_node = inputs[0].comp_node;
TensorLayout src = inputs[0].layout; TensorLayout src = inputs[0].layout;
TensorLayout filter = inputs[1].layout;
size_t src_ndim = src.ndim; size_t src_ndim = src.ndim;
if (src_ndim == 0) { if (src_ndim == 0 || filter.ndim == 0) {
desc.layout = src; desc.layout = TensorLayout{{}, src.dtype};
return {dests, false}; return {dests, false};
} }
TensorLayout filter = inputs[1].layout;
desc.layout = do_shape_infer(def, src_ndim, src, filter); desc.layout = do_shape_infer(def, src_ndim, src, filter);
return {dests, true}; return {dests, true};
} }
...@@ -165,23 +165,24 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -165,23 +165,24 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
param.format = conv.format; param.format = conv.format;
// shape infer // shape infer
TensorLayout shp({0}, inputs[0]->dtype()); TensorLayout empty_shp({0}, inputs[0]->dtype());
shp.ndim = 0; empty_shp.ndim = 0;
size_t sz = setup_algo<megdnn::ConvBiasForward>( size_t sz = setup_algo<megdnn::ConvBiasForward>(
{inp_shapes[0], inp_shapes[1], shp, shp, oup_shapes[0]}, dnn_opr.op.get(), {inp_shapes[0], inp_shapes[1], empty_shp, empty_shp, oup_shapes[0]},
0, false, false, cn, conv.policy(), false); dnn_opr.op.get(), 0, false, false, cn, conv.policy(), false);
// alloc memory // alloc memory
DeviceTensorND bias = BlobManager::inst()->alloc_workspace_with_defrag(cn, shp); DeviceTensorND empty_bias =
BlobManager::inst()->alloc_workspace_with_defrag(cn, empty_shp);
TensorLayout w_layout({sz}, dtype::Byte()); TensorLayout w_layout({sz}, dtype::Byte());
auto dnn_wk = dnn_opr.create_workspace(w_layout); auto dnn_wk = dnn_opr.create_workspace(w_layout);
// exeucte // exeucte
dnn_opr.op->exec( dnn_opr.op->exec(
inp_tensornds[0], inp_tensornds[1], bias.as_megdnn(), bias.as_megdnn(), inp_tensornds[0], inp_tensornds[1], empty_bias.as_megdnn(),
out.as_megdnn(), nullptr, dnn_wk); empty_bias.as_megdnn(), out.as_megdnn(), nullptr, dnn_wk);
return {Tensor::make(out)}; return {Tensor::make(out)};
} }
...@@ -333,18 +334,15 @@ TensorLayout convbwd_do_shape_infer( ...@@ -333,18 +334,15 @@ TensorLayout convbwd_do_shape_infer(
std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) { const OpDef& def, const SmallVector<LogicalTensorDesc>& inputs) {
auto&& conv = static_cast<const ConvolutionBackwardData&>(def);
SmallVector<LogicalTensorDesc> dests(1); SmallVector<LogicalTensorDesc> dests(1);
auto&& desc = dests[0]; auto&& desc = dests[0];
desc.comp_node = inputs[0].comp_node; desc.comp_node = inputs[0].comp_node;
TensorLayout filter = inputs[0].layout; TensorLayout filter = inputs[0].layout;
TensorLayout diff = inputs[1].layout; TensorLayout diff = inputs[1].layout;
size_t filter_ndim = filter.ndim;
size_t diff_ndim = diff.ndim; size_t diff_ndim = diff.ndim;
if (diff_ndim == 0) { if (diff_ndim == 0 || filter.ndim == 0) {
desc.layout = diff; desc.layout = TensorLayout{{}, diff.dtype};
return {dests, false}; return {dests, false};
} }
...@@ -506,12 +504,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( ...@@ -506,12 +504,13 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
desc.comp_node = inputs[0].comp_node; desc.comp_node = inputs[0].comp_node;
TensorLayout src = inputs[0].layout; TensorLayout src = inputs[0].layout;
TensorLayout filter = inputs[1].layout;
size_t src_ndim = src.ndim; size_t src_ndim = src.ndim;
if (src_ndim == 0) { if (src_ndim == 0 || filter.ndim == 0) {
desc.layout = TensorLayout{{}, src.dtype};
return {dests, false}; return {dests, false};
} }
TensorLayout filter = inputs[1].layout;
desc.layout = do_shape_infer(def, src_ndim, src, filter); desc.layout = do_shape_infer(def, src_ndim, src, filter);
return {dests, true}; return {dests, true};
} }
...@@ -549,8 +548,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -549,8 +548,11 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
DeviceTensorND out = DeviceTensorND out =
BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout); BlobManager::inst()->alloc_workspace_with_defrag(cn, out_layout);
TensorLayout w_layout({sz}, dtype::Byte()); megdnn::Workspace dnn_wk;
auto dnn_wk = dnn_opr.create_workspace(w_layout); if (sz != 0) {
TensorLayout w_layout({sz}, dtype::Byte());
dnn_wk = dnn_opr.create_workspace(w_layout);
}
// exeucte // exeucte
dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk); dnn_opr.op->exec(inp_tensornds[0], inp_tensornds[1], out.as_megdnn(), dnn_wk);
...@@ -581,7 +583,7 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible( ...@@ -581,7 +583,7 @@ std::tuple<SmallVector<LogicalTensorDesc>, bool> infer_output_attrs_fallible(
auto&& diff = inputs[1]; auto&& diff = inputs[1];
auto& cn = weight.comp_node; auto& cn = weight.comp_node;
if (weight.layout.ndim == 0) { if (weight.layout.ndim == 0 || diff.layout.ndim == 0) {
return {{{TensorLayout{weight.layout.dtype}, cn, {}}}, false}; return {{{TensorLayout{weight.layout.dtype}, cn, {}}}, false};
} }
...@@ -616,9 +618,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor( ...@@ -616,9 +618,8 @@ SmallVector<TensorPtr> apply_on_physical_tensor(
cn, op_def.policy(), false); cn, op_def.policy(), false);
megdnn::Workspace dnn_wk; megdnn::Workspace dnn_wk;
if (wk_size != 0) { if (wk_size != 0) {
auto wk = Blob::make(cn, wk_size); TensorLayout w_layout({wk_size}, dtype::Byte());
dnn_wk.raw_ptr = wk->storage().get(); dnn_wk = caller.create_workspace(w_layout);
dnn_wk.size = wk_size;
} }
dnn_opr->exec(weight, diff, oup.as_megdnn(), dnn_wk); dnn_opr->exec(weight, diff, oup.as_megdnn(), dnn_wk);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册