提交 60c6d59f 编写于 作者: M Megvii Engine Team

feat(mbg/core): support bias preprocess in conv_bias

GitOrigin-RevId: d2e1e14d414a9dcb9a6816515100b854eedb8a0e
上级 ff8ef9ed
......@@ -378,15 +378,15 @@ public:
_megdnn_workspace workspace) = 0;
/**
* \brief execute weight preprocessing, read weights form filter and write
* to preprocessed_filter after preprocessed.
* \brief execute weight preprocessing, read weights form filter and bias,
* write to preprocessed_filter after preprocessed.
*
* \praram[in] workspace the needed tmp workspace when exec_preprocess
* running, the size is got by get_preprocess_workspace_in_bytes
*/
virtual void exec_preprocess(const TensorLayout& src_layout,
_megdnn_tensor_in filter,
const TensorLayout& bias_layout,
_megdnn_tensor_in bias,
const TensorLayout& z_layout,
const TensorLayout& dst_layout,
PreprocessedFilter* preprocessed_filter,
......
......@@ -238,11 +238,11 @@ ConvBiasForwardImpl::deduce_preprocessed_filter_layout(
void ConvBiasForwardImpl::exec_preprocess(
const TensorLayout& src_layout, _megdnn_tensor_in filter,
const TensorLayout& bias_layout, const TensorLayout& z_layout,
_megdnn_tensor_in bias, const TensorLayout& z_layout,
const TensorLayout& dst_layout, PreprocessedFilter* preprocessed_filter,
_megdnn_workspace workspace) {
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout},
z{nullptr, z_layout}, bias{nullptr, bias_layout};
z{nullptr, z_layout};
AlgoBase::ExecArgs args(this, src, filter, bias, z, dst, workspace,
preprocessed_filter);
auto algo = get_algorithm(this, src.layout, filter.layout, bias.layout,
......
......@@ -49,7 +49,7 @@ public:
const TensorLayout&, const TensorLayout&, const TensorLayout&,
const TensorLayout&, const TensorLayout&) override;
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in,
const TensorLayout&, const TensorLayout&,
_megdnn_tensor_in, const TensorLayout&,
const TensorLayout&, PreprocessedFilter*,
_megdnn_workspace) override;
const char* get_algorithm_set_name() const override;
......
......@@ -178,15 +178,14 @@ void ConvBiasImpl::exec(_megdnn_tensor_in src, _megdnn_tensor_in filter,
void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout,
_megdnn_tensor_in filter,
const TensorLayout& bias_layout,
_megdnn_tensor_in bias,
const TensorLayout& z_layout,
const TensorLayout& dst_layout,
PreprocessedFilter* preprocessed_filter,
_megdnn_workspace workspace) {
//! exec_preprocess currently only support preprocess weights before exec,
//! src/dst/bias/z will be ignored, just set to nullptr
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout},
bias{nullptr, bias_layout};
//! exec_preprocess currently only support preprocess weights and bias
//! before exec, src/dst/z will be ignored, just set to nullptr
TensorND src{nullptr, src_layout}, dst{nullptr, dst_layout};
auto fparam = make_ncb_kern_param(src, filter, bias, dst, workspace,
preprocessed_filter);
//! should not pass workspace_size limit otherwise can not find match algo
......@@ -196,7 +195,7 @@ void ConvBiasImpl::exec_preprocess(const TensorLayout& src_layout,
exec_preprocess_with_ncb_kern(fparam, algo);
} else {
naive::ConvBiasForwardImpl::exec_preprocess(
src_layout, filter, bias_layout, z_layout, dst_layout,
src_layout, filter, bias, z_layout, dst_layout,
preprocessed_filter, workspace);
}
}
......
......@@ -57,7 +57,7 @@ public:
void exec_preprocess(const TensorLayout& src_layout,
_megdnn_tensor_in filter,
const TensorLayout& bias_layout,
_megdnn_tensor_in bias,
const TensorLayout& z_layout,
const TensorLayout& dst_layout,
PreprocessedFilter* preprocessed_filter,
......
......@@ -59,7 +59,7 @@ public:
}
void exec_preprocess(const TensorLayout&, _megdnn_tensor_in,
const TensorLayout&, const TensorLayout&,
_megdnn_tensor_in, const TensorLayout&,
const TensorLayout&, PreprocessedFilter*,
_megdnn_workspace) override {}
......
......@@ -601,7 +601,7 @@ struct OprWeightPreprocessProxy<ConvBiasForward>
tensors[3].layout, tensors[4].layout);
WorkspaceWrapper preprocess_workspace(opr->handle(),
preprocess_workspace_size);
opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2].layout,
opr->exec_preprocess(tensors[0].layout, tensors[1], tensors[2],
tensors[3].layout, tensors[4].layout,
&preprocessed_filter,
preprocess_workspace.workspace());
......
......@@ -1955,6 +1955,39 @@ typename DnnOp::Algorithm* try_find_any_weight_preprocess_algo(
return nullptr;
}
template <typename DnnOp, typename... Args>
typename DnnOp::Algorithm* try_find_any_bias_preprocess_algo(
DnnOp* dnn_op, const char* mgb_info, Maybe<bool>& found,
Args&& ...args) {
if (found.valid()) {
if (found.val()) {
return dnn_op->execution_policy().algorithm;
} else {
return nullptr;
}
}
for (auto&& algo : dnn_op->get_all_algorithms(
std::forward<Args>(args)...)) {
dnn_op->execution_policy().algorithm = algo;
auto layouts = dnn_op->deduce_preprocessed_filter_layout(
std::forward<Args>(args)...);
if (layouts.size() <= 1)
continue;
bool valid = false;
if (!layouts[1].is_empty()) {
valid = true;
break;
}
if (valid) {
found.emplace(true);
return algo;
}
}
found.emplace(false);
mgb_log_warn("Can't find bias preprocess algo for op %s", mgb_info);
return nullptr;
}
void test_free_memory_in_weight_preprocess(int record_level, CompNode cn) {
HostTensorGenerator<> gen;
auto graph = ComputingGraph::make();
......@@ -2152,4 +2185,54 @@ TEST(TestGraph, FreeMemoryInWeightPreprocessWithMultiReader) {
.empty());
}
TEST(TestGraph, FreeBias) {
HostTensorGenerator<> gen;
auto graph = ComputingGraph::make();
auto cn = CompNode::load("xpu0");
graph->options().graph_opt.weight_preprocess = true;
auto mkvar = [&](const char* name, const TensorShape& shp) {
return opr::Host2DeviceCopy::make(*graph, gen(shp, cn)).rename(name);
};
auto mkcvar = [&](const char* name, const TensorShape& shp) {
return opr::SharedDeviceTensor::make_const(*graph, *gen(shp, cn))
.rename(name);
};
auto x = mkvar("x", {1, 32, 16, 16});
// ConvBias test dense
opr::ConvBias::Param param_conv_bias;
param_conv_bias.pad_h = param_conv_bias.pad_w = 0;
param_conv_bias.sparse = opr::ConvBias::Param::Sparse::DENSE;
auto w1 = mkcvar("w1", {32, 32, 1, 1}), b1 = mkcvar("b1", {1, 32, 1, 1});
auto conv1 = opr::ConvBias::make(x, w1, b1, param_conv_bias);
Maybe<bool> wp1;
conv1.node()->owner_opr()->cast_final_safe<opr::ConvBias>()
.setup_algo_chooser([&](const cg::OperatorNodeBase* opr) {
return try_find_any_bias_preprocess_algo(
opr->cast_final_safe<opr::ConvBias>().megdnn_opr(),
opr->cname(), wp1,
opr->input(0)->layout(), opr->input(1)->layout(),
opr->input(2)->layout(), TensorLayout{},
opr->output(0)->layout());
});
HostTensorND host_y;
auto func =graph->compile({make_callback_copy(conv1, host_y)});
//!flag the no need memory of var
func->execute();
//!free the no need memory of var
func->execute();
auto check = [&](SymbolVar v) {
ASSERT_TRUE(v.node()->contain_flag(VarNode::Flag::MEMORY_NO_NEED));
ASSERT_TRUE(v.node()->dev_tensor().empty());
ASSERT_TRUE(v.node()->owner_opr()
->cast_final_safe<opr::SharedDeviceTensor>()
.get_dev_tensor()
.empty());
};
ASSERT_TRUE(wp1.valid());
if (wp1.val()) {
check(b1);
}
}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
......@@ -961,17 +961,37 @@ void ConvBiasForward::scn_do_execute_preprocess() {
z_layout = input(3)->layout();
}
megdnn_opr()->exec_preprocess(
input(0)->layout(), input(1)->dev_tensor().as_megdnn(), bias_layout,
z_layout, output(0)->layout(), preprocessed_filter(),
input(0)->layout(), input(1)->dev_tensor().as_megdnn(),
input(2)->dev_tensor().as_megdnn(), z_layout, output(0)->layout(),
preprocessed_filter(),
intl::get_megdnn_workspace_from_var(output().back()));
//! Flag the input(1) no use later, which can be freed when no other
//! Flag the weight and bias no use later, which can be freed when no other
//! var depend on its dev_value, host_value and shape.
auto receiver_info =
auto receiver_info_weight =
input(1)->owner_graph()->var_receiver_in_current_comp_seq(input(1));
if (receiver_info.dev_value == 1 && receiver_info.host_value == 0 &&
receiver_info.shape == 0) {
if (receiver_info_weight.dev_value == 1 &&
receiver_info_weight.host_value == 0 &&
receiver_info_weight.shape == 0) {
input(1)->add_flag(VarNode::Flag::MEMORY_NO_NEED);
}
//! if bias is preprocessd
if (input().size() > 3) {
auto preprocessed_layouts =
megdnn_opr()->deduce_preprocessed_filter_layout(
input(0)->layout(), input(1)->layout(), bias_layout,
z_layout, output(0)->layout());
if (preprocessed_layouts.size() > 1 &&
!preprocessed_layouts[1].is_empty()) {
auto receiver_info_bias =
input(2)->owner_graph()->var_receiver_in_current_comp_seq(
input(2));
if (receiver_info_bias.dev_value == 1 &&
receiver_info_bias.host_value == 0 &&
receiver_info_bias.shape == 0) {
input(2)->add_flag(VarNode::Flag::MEMORY_NO_NEED);
}
}
}
}
/* ===================== LocalShareForward ==================== */
......
......@@ -178,9 +178,26 @@ typename TimedProfiler<Opr>::TResult TimedProfiler<Opr>::prof_impl(
for (size_t i = 0; i < flt_val.size(); i++) {
pf.tensors[i] = flt_val[i].as_megdnn();
}
APPLY(_(megdnn_opr)->exec_preprocess(args..., &pf, mdn_workspace),
std::forward_as_tuple(layouts[0], inp_val[1].as_megdnn()),
array_skip<2>(layouts));
if_constexpr<opr_contain_bias<Opr>()>(
//! convbias
[&](auto __) {
APPLY(__(megdnn_opr)
->exec_preprocess(args..., &pf,
mdn_workspace),
std::forward_as_tuple(layouts[0],
inp_val[1].as_megdnn(),
inp_val[2].as_megdnn()),
array_skip<arity_in - 1>(layouts));
},
//! Convolution
[&](auto __) {
APPLY(__(megdnn_opr)
->exec_preprocess(args..., &pf,
mdn_workspace),
std::forward_as_tuple(layouts[0],
inp_val[1].as_megdnn()),
array_skip<2>(layouts));
});
}
});
......
......@@ -75,6 +75,11 @@ constexpr bool opr_supports_preprocess() {
std::is_same<Opr, megdnn::ConvBias>::value;
}
template <typename Opr>
constexpr bool opr_contain_bias() {
return std::is_same<Opr, megdnn::ConvBias>::value;
}
template <typename Opr, bool has_prep>
struct PreprocessFilterImpl {
using T = union {};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册