提交 f398c8e6 编写于 作者: M Megvii Engine Team 提交者: XindaH

feat(imperative): add magicmind runtime opr

GitOrigin-RevId: 02ddb886d959104c3aa8320e3ca903e71476c814
上级 f06c890e
...@@ -66,3 +66,15 @@ def atlas_runtime_opr(inputs, data): ...@@ -66,3 +66,15 @@ def atlas_runtime_opr(inputs, data):
op = builtin.AtlasRuntime(data, len(data)) op = builtin.AtlasRuntime(data, len(data))
return apply(op, *inputs) return apply(op, *inputs)
def magicmind_runtime_opr(inputs, data):
r"""Load a serialized MagicMind model as a runtime operator in MegEngine.
Args:
inputs: list of input tensors.
data: the serialized MagicMind model.
"""
op = builtin.MagicMindRuntime(data, len(data))
return apply(op, *inputs)
...@@ -130,3 +130,27 @@ class AtlasRuntimeSubgraph(Module): ...@@ -130,3 +130,27 @@ class AtlasRuntimeSubgraph(Module):
def forward(self, *inputs): def forward(self, *inputs):
return atlas_runtime_opr(inputs, data=self._data) return atlas_runtime_opr(inputs, data=self._data)
class MagicMindRuntimeSubgraph(Module):
r"""Load a serialized MagicMindRuntime subgraph.
See :func:`~.magicmind_runtime_opr` for more details.
"""
def __init__(self, data, **kwargs):
super(MagicMindRuntimeSubgraph, self).__init__(**kwargs)
self._data = data
@property
def data(self):
return self._data
@data.setter
def data(self, val):
self._data = np.frombuffer(val, dtype=np.uint8)
def forward(self, *inputs):
return magicmind_runtime_opr(inputs, data=self._data)
/**
* \file imperative/src/impl/ops/magicmind_runtime.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "../op_trait.h"
#include "megbrain/imperative/ops/autogen.h"
#if MGB_CAMBRICON
#include "megbrain/cambricon/magicmind_runtime_opr.h"
namespace mgb::imperative {
namespace {
namespace magicmind_runtime {
auto apply_on_var_node(const OpDef& def, const VarNodeArray& inputs) {
auto&& op = static_cast<const MagicMindRuntime&>(def);
SymbolVarArray symbol_var_inputs(inputs.begin(), inputs.end());
OperatorNodeConfig config{op.make_name()};
return opr::MagicMindRuntimeOpr::make(
op.buf.c_str(), op.buf_size, symbol_var_inputs, config);
}
OP_TRAIT_REG(MagicMindRuntime, MagicMindRuntime)
.apply_on_var_node(apply_on_var_node)
.fallback();
} // namespace magicmind_runtime
} // namespace
} // namespace mgb::imperative
#endif
...@@ -166,8 +166,8 @@ MagicMindRuntimeOpr::MagicMindRuntimeOpr( ...@@ -166,8 +166,8 @@ MagicMindRuntimeOpr::MagicMindRuntimeOpr(
const OperatorNodeConfig& config) const OperatorNodeConfig& config)
: Super(inputs[0]->owner_graph(), config, "magic_runtime", inputs), : Super(inputs[0]->owner_graph(), config, "magic_runtime", inputs),
m_allocator{std::move(allocator)}, m_allocator{std::move(allocator)},
m_context{nullptr},
m_engine{nullptr}, m_engine{nullptr},
m_context{nullptr},
m_model{std::move(model)} { m_model{std::move(model)} {
mgb_assert( mgb_assert(
inputs[0]->comp_node().device_type() == CompNode::DeviceType::CAMBRICON, inputs[0]->comp_node().device_type() == CompNode::DeviceType::CAMBRICON,
...@@ -207,7 +207,7 @@ void MagicMindRuntimeOpr::scn_do_execute() { ...@@ -207,7 +207,7 @@ void MagicMindRuntimeOpr::scn_do_execute() {
cnrt_env.activate(); cnrt_env.activate();
std::vector<IRTTensor*> inputs, outputs; std::vector<IRTTensor*> inputs, outputs;
MM_CHECK(CreateInputTensors(m_context.get(), &inputs)); MM_CHECK(CreateInputTensors(m_context.get(), &inputs));
MM_CHECK(CreateInputTensors(m_context.get(), &outputs)); MM_CHECK(CreateOutputTensors(m_context.get(), &outputs));
size_t nr_inputs = input().size(); size_t nr_inputs = input().size();
mgb_assert(nr_inputs == inputs.size()); mgb_assert(nr_inputs == inputs.size());
for (size_t i = 0; i < nr_inputs; ++i) { for (size_t i = 0; i < nr_inputs; ++i) {
...@@ -234,11 +234,9 @@ void MagicMindRuntimeOpr::scn_do_execute() { ...@@ -234,11 +234,9 @@ void MagicMindRuntimeOpr::scn_do_execute() {
MM_CHECK(m_context->SetWorkspace(output().back()->dev_tensor().raw_ptr(), size)); MM_CHECK(m_context->SetWorkspace(output().back()->dev_tensor().raw_ptr(), size));
MM_CHECK(m_context->Enqueue(inputs, outputs, cnrt_env.queue)); MM_CHECK(m_context->Enqueue(inputs, outputs, cnrt_env.queue));
for (auto&& i : inputs) { for (auto&& i : inputs) {
i->SetData(nullptr);
i->Destroy(); i->Destroy();
} }
for (auto&& o : outputs) { for (auto&& o : outputs) {
o->SetData(nullptr);
o->Destroy(); o->Destroy();
} }
} }
...@@ -260,7 +258,7 @@ void MagicMindRuntimeOpr::get_output_var_shape( ...@@ -260,7 +258,7 @@ void MagicMindRuntimeOpr::get_output_var_shape(
} }
std::vector<IRTTensor*> inputs, outputs; std::vector<IRTTensor*> inputs, outputs;
MM_CHECK(CreateInputTensors(m_context.get(), &inputs)); MM_CHECK(CreateInputTensors(m_context.get(), &inputs));
MM_CHECK(CreateInputTensors(m_context.get(), &outputs)); MM_CHECK(CreateOutputTensors(m_context.get(), &outputs));
size_t nr_inputs = input().size(); size_t nr_inputs = input().size();
mgb_assert(nr_inputs == inputs.size()); mgb_assert(nr_inputs == inputs.size());
for (size_t i = 0; i < nr_inputs; ++i) { for (size_t i = 0; i < nr_inputs; ++i) {
...@@ -295,12 +293,11 @@ void MagicMindRuntimeOpr::get_output_var_shape( ...@@ -295,12 +293,11 @@ void MagicMindRuntimeOpr::get_output_var_shape(
false, "static shape infer for MagicMindRuntimeOpr(%s) failed", false, "static shape infer for MagicMindRuntimeOpr(%s) failed",
cname()); cname());
} }
return;
for (auto&& i : inputs) { for (auto&& i : inputs) {
i->SetData(nullptr);
i->Destroy(); i->Destroy();
} }
for (auto&& o : outputs) { for (auto&& o : outputs) {
o->SetData(nullptr);
o->Destroy(); o->Destroy();
} }
} }
...@@ -332,10 +329,10 @@ void MagicMindRuntimeOpr::init_output_dtype() { ...@@ -332,10 +329,10 @@ void MagicMindRuntimeOpr::init_output_dtype() {
} }
std::vector<DataType> out_dtypes = m_model->GetOutputDataTypes(); std::vector<DataType> out_dtypes = m_model->GetOutputDataTypes();
mgb_assert( mgb_assert(
out_dtypes.size() == output().size(), out_dtypes.size() + 1 == output().size(),
"output size mismatch(got:%zu,expected:%zu)", out_dtypes.size(), "output size mismatch(got:%zu,expected:%zu)", out_dtypes.size(),
output().size()); output().size());
size_t nr_outputs = output().size(); size_t nr_outputs = out_dtypes.size();
for (size_t i = 0; i < nr_outputs; ++i) { for (size_t i = 0; i < nr_outputs; ++i) {
auto dt_mm = mm_dtype_to_mgb_dtype(out_dtypes[i]); auto dt_mm = mm_dtype_to_mgb_dtype(out_dtypes[i]);
mgb_assert( mgb_assert(
......
...@@ -90,8 +90,8 @@ public: ...@@ -90,8 +90,8 @@ public:
private: private:
CambriconAllocatorPtr m_allocator; CambriconAllocatorPtr m_allocator;
mutable IContextPtr m_context;
IEnginePtr m_engine; IEnginePtr m_engine;
mutable IContextPtr m_context;
IModelPtr m_model; IModelPtr m_model;
}; };
......
...@@ -135,10 +135,10 @@ public: ...@@ -135,10 +135,10 @@ public:
constexpr int kh = 3, kw = 3; constexpr int kh = 3, kw = 3;
constexpr int stride_h = 1, stride_w = 1; constexpr int stride_h = 1, stride_w = 1;
constexpr int pad_h = 1, pad_w = 1; constexpr int pad_h = 1, pad_w = 1;
magicmind::Dims input_dim{{ni, ci, hi, wi}}; magicmind::Dims input_dim{{ni, hi, wi, ci}};
magicmind::Dims filter_dim{{co, ci, kh, kw}}; magicmind::Dims filter_dim{{co, kh, kw, ci}};
magicmind::Dims bias_dim{{co}}; magicmind::Dims bias_dim{{co}};
magicmind::Dims add_dim{{no, co, ho, wo}}; magicmind::Dims add_dim{{no, ho, wo, co}};
magicmind::DataType output_datatype = magicmind::DataType::FLOAT32; magicmind::DataType output_datatype = magicmind::DataType::FLOAT32;
// init // init
...@@ -148,13 +148,13 @@ public: ...@@ -148,13 +148,13 @@ public:
{ {
"graph_shape_mutable": {{GRAPH_SHAPE_MUTABLE}}, "graph_shape_mutable": {{GRAPH_SHAPE_MUTABLE}},
"precision_config": { "precision_config": {
"precision_mode": "qint8_mixed_float16" "precision_mode": "qint8_mixed_float32"
} }
} }
)"; )";
replace_all_pairs_inplace( replace_all_pairs_inplace(
user_json_config, user_json_config,
{{"{{GRAPH_SHAPE_MUTABLE}}", std::to_string(graph_shape_mutable_)}}); {{"{{GRAPH_SHAPE_MUTABLE}}", graph_shape_mutable_ ? "true" : "false"}});
config->ParseFromString(user_json_config); config->ParseFromString(user_json_config);
auto network = make_mm_unique_ptr(magicmind::CreateINetwork()); auto network = make_mm_unique_ptr(magicmind::CreateINetwork());
magicmind::Range filter_range = {0.0f, 0.0f}; magicmind::Range filter_range = {0.0f, 0.0f};
...@@ -278,6 +278,9 @@ public: ...@@ -278,6 +278,9 @@ public:
std::string buf; std::string buf;
buf.resize(size); buf.resize(size);
MM_CHECK(model_->SerializeToMemory(reinterpret_cast<void*>(buf.data()), size)); MM_CHECK(model_->SerializeToMemory(reinterpret_cast<void*>(buf.data()), size));
model_.reset();
model_ = std::move(MagicMindRuntimeOpr::make_model_ptr(CreateIModel()));
model_->DeserializeFromMemory(reinterpret_cast<void*>(buf.data()), size);
if (serialize_to_file) { if (serialize_to_file) {
std::string fname = ssprintf( std::string fname = ssprintf(
"./output/MagicMindRuntimeOprTest.%s.mlu", "./output/MagicMindRuntimeOprTest.%s.mlu",
...@@ -332,6 +335,10 @@ public: ...@@ -332,6 +335,10 @@ public:
printf("inference time = %.2fs\n", time / static_cast<float>(runs) * 1e-3); printf("inference time = %.2fs\n", time / static_cast<float>(runs) * 1e-3);
MGB_CNRT_CHECK(cnrtDestroyNotifier(&start)); MGB_CNRT_CHECK(cnrtDestroyNotifier(&start));
MGB_CNRT_CHECK(cnrtDestroyNotifier(&end)); MGB_CNRT_CHECK(cnrtDestroyNotifier(&end));
for (auto&& i : input_tensors)
i->Destroy();
for (auto&& o : output_tensors)
o->Destroy();
} }
}; };
} // namespace } // namespace
...@@ -387,9 +394,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) { ...@@ -387,9 +394,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
add_output_mlu_ptr, mlu_deleter}; add_output_mlu_ptr, mlu_deleter};
network.infer_model( network.infer_model(
{conv_input_mlu_ptr, add_output_mlu_ptr}, {conv_input_mlu_ptr, add_input_mlu_ptr},
{relu_output_mlu_ptr, add_output_mlu_ptr}, {relu_output_mlu_ptr, add_output_mlu_ptr},
{Dims{{ni, ci, hi, wi}}, Dims{{no, co, ho, wo}}}); {Dims{{ni, hi, wi, ci}}, Dims{{no, ho, wo, co}}});
// result memory copy cnml->cpu // result memory copy cnml->cpu
// memory copy cpu->mlu // memory copy cpu->mlu
...@@ -402,9 +409,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) { ...@@ -402,9 +409,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
auto buf = network.get_serialized_model(false); auto buf = network.get_serialized_model(false);
auto x = std::make_shared<HostTensorND>( auto x = std::make_shared<HostTensorND>(
cn, TensorLayout{{ni, ci, hi, wi}, dtype::Float32()}); cn, TensorLayout{{ni, hi, wi, ci}, dtype::Float32()});
auto add = std::make_shared<HostTensorND>( auto add = std::make_shared<HostTensorND>(
cn, TensorLayout{{no, co, ho, wo}, dtype::Float32()}); cn, TensorLayout{{no, ho, wo, co}, dtype::Float32()});
std::memcpy( std::memcpy(
reinterpret_cast<void*>(x->ptr<dt_float32>()), conv_input_cpu_data.data(), reinterpret_cast<void*>(x->ptr<dt_float32>()), conv_input_cpu_data.data(),
conv_input_count * sizeof(float)); conv_input_count * sizeof(float));
...@@ -418,13 +425,13 @@ TEST(TestMagicMindRuntimeOpr, Basic) { ...@@ -418,13 +425,13 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_}); reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_});
auto out1 = outs[0]; auto out1 = outs[0];
auto out2 = outs[1]; auto out2 = outs[1];
HostTensorND o1(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o1(cn, {no, ho, wo, co}, dtype::Float32());
HostTensorND o2(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o2(cn, {no, ho, wo, co}, dtype::Float32());
auto func = graph->compile( auto func = graph->compile(
{make_callback_copy(out1, o1), make_callback_copy(out2, o2)}); {make_callback_copy(out1, o1), make_callback_copy(out2, o2)});
func->execute(); func->execute();
HostTensorND o1_mm(cn, {no, co, ho, wo}, dtype::Float32()), HostTensorND o1_mm(cn, {no, ho, wo, co}, dtype::Float32()),
o2_mm(cn, {no, co, ho, wo}, dtype::Float32()); o2_mm(cn, {no, ho, wo, co}, dtype::Float32());
std::memcpy( std::memcpy(
o1_mm.ptr<float>(), relu_output_cpu_data.data(), o1_mm.ptr<float>(), relu_output_cpu_data.data(),
relu_output_count * sizeof(float)); relu_output_count * sizeof(float));
...@@ -486,9 +493,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) { ...@@ -486,9 +493,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
add_output_mlu_ptr, mlu_deleter}; add_output_mlu_ptr, mlu_deleter};
network.infer_model( network.infer_model(
{conv_input_mlu_ptr, add_output_mlu_ptr}, {conv_input_mlu_ptr, add_input_mlu_ptr},
{relu_output_mlu_ptr, add_output_mlu_ptr}, {relu_output_mlu_ptr, add_output_mlu_ptr},
{Dims{{ni, ci, hi, wi}}, Dims{{no, co, ho, wo}}}); {Dims{{ni, hi, wi, ci}}, Dims{{no, ho, wo, co}}});
// result memory copy cnml->cpu // result memory copy cnml->cpu
// memory copy cpu->mlu // memory copy cpu->mlu
...@@ -501,9 +508,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) { ...@@ -501,9 +508,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
auto buf = network.get_serialized_model(false); auto buf = network.get_serialized_model(false);
auto x = std::make_shared<HostTensorND>( auto x = std::make_shared<HostTensorND>(
cn, TensorLayout{{ni, ci, hi, wi}, dtype::QuantizedS8{1.f}}); cn, TensorLayout{{ni, hi, wi, ci}, dtype::QuantizedS8{1.f}});
auto add = std::make_shared<HostTensorND>( auto add = std::make_shared<HostTensorND>(
cn, TensorLayout{{no, co, ho, wo}, dtype::Float32()}); cn, TensorLayout{{no, ho, wo, co}, dtype::Float32()});
std::memcpy( std::memcpy(
reinterpret_cast<void*>(x->raw_ptr()), conv_input_cpu_data.data(), reinterpret_cast<void*>(x->raw_ptr()), conv_input_cpu_data.data(),
conv_input_count * sizeof(int8_t)); conv_input_count * sizeof(int8_t));
...@@ -517,13 +524,13 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) { ...@@ -517,13 +524,13 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_}); reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_});
auto out1 = outs[0]; auto out1 = outs[0];
auto out2 = outs[1]; auto out2 = outs[1];
HostTensorND o1(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o1(cn, {no, ho, wo, co}, dtype::Float32());
HostTensorND o2(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o2(cn, {no, ho, wo, co}, dtype::Float32());
auto func = graph->compile( auto func = graph->compile(
{make_callback_copy(out1, o1), make_callback_copy(out2, o2)}); {make_callback_copy(out1, o1), make_callback_copy(out2, o2)});
func->execute(); func->execute();
HostTensorND o1_mm(cn, {no, co, ho, wo}, dtype::Float32()), HostTensorND o1_mm(cn, {no, ho, wo, co}, dtype::Float32()),
o2_mm(cn, {no, co, ho, wo}, dtype::Float32()); o2_mm(cn, {no, ho, wo, co}, dtype::Float32());
std::memcpy( std::memcpy(
o1_mm.ptr<float>(), relu_output_cpu_data.data(), o1_mm.ptr<float>(), relu_output_cpu_data.data(),
relu_output_count * sizeof(float)); relu_output_count * sizeof(float));
...@@ -591,9 +598,9 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) { ...@@ -591,9 +598,9 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
add_output_mlu_ptr, mlu_deleter}; add_output_mlu_ptr, mlu_deleter};
network.infer_model( network.infer_model(
{conv_input_mlu_ptr, add_output_mlu_ptr}, {conv_input_mlu_ptr, add_input_mlu_ptr},
{relu_output_mlu_ptr, add_output_mlu_ptr}, {relu_output_mlu_ptr, add_output_mlu_ptr},
{Dims{{ni, ci, hi, wi}}, Dims{{no, co, ho, wo}}}); {Dims{{ni, hi, wi, ci}}, Dims{{no, ho, wo, co}}});
// result memory copy cnml->cpu // result memory copy cnml->cpu
// memory copy cpu->mlu // memory copy cpu->mlu
...@@ -607,11 +614,11 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) { ...@@ -607,11 +614,11 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
auto buf = network.get_serialized_model(true); auto buf = network.get_serialized_model(true);
auto mkshp = [](int n, int c, int h, int w) { auto mkshp = [](int n, int c, int h, int w) {
size_t nz = n, cz = c, hz = h, wz = w; size_t nz = n, cz = c, hz = h, wz = w;
return TensorShape{nz, cz, hz, wz}; return TensorShape{nz, hz, wz, cz};
}; };
auto mkly = [](int n, int c, int h, int w, DType dtype) { auto mkly = [](int n, int c, int h, int w, DType dtype) {
size_t nz = n, cz = c, hz = h, wz = w; size_t nz = n, cz = c, hz = h, wz = w;
return TensorLayout{{nz, cz, hz, wz}, dtype}; return TensorLayout{{nz, hz, wz, cz}, dtype};
}; };
auto x = std::make_shared<HostTensorND>( auto x = std::make_shared<HostTensorND>(
cn, mkly(ni, ci, hi, wi, dtype::Float32())); cn, mkly(ni, ci, hi, wi, dtype::Float32()));
...@@ -662,9 +669,9 @@ TEST(TestMagicMindRuntimeOpr, Serialization) { ...@@ -662,9 +669,9 @@ TEST(TestMagicMindRuntimeOpr, Serialization) {
const int ni = 1, ci = 64, hi = 32, wi = 32; const int ni = 1, ci = 64, hi = 32, wi = 32;
const int no = 1, co = 64, ho = 32, wo = 32; const int no = 1, co = 64, ho = 32, wo = 32;
auto x = std::make_shared<HostTensorND>( auto x = std::make_shared<HostTensorND>(
cn, TensorLayout{{ni, ci, hi, wi}, dtype::Float32()}); cn, TensorLayout{{ni, hi, wi, ci}, dtype::Float32()});
auto add = std::make_shared<HostTensorND>( auto add = std::make_shared<HostTensorND>(
cn, TensorLayout{{no, co, ho, wo}, dtype::Float32()}); cn, TensorLayout{{no, ho, wo, co}, dtype::Float32()});
auto graph = ComputingGraph::make(); auto graph = ComputingGraph::make();
auto x_ = opr::Host2DeviceCopy::make(*graph, x); auto x_ = opr::Host2DeviceCopy::make(*graph, x);
auto add_ = opr::Host2DeviceCopy::make(*graph, add); auto add_ = opr::Host2DeviceCopy::make(*graph, add);
...@@ -693,11 +700,11 @@ TEST(TestMagicMindRuntimeOpr, Profiling) { ...@@ -693,11 +700,11 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
MMNetwork network(cn, magicmind::DataType::FLOAT32, true); MMNetwork network(cn, magicmind::DataType::FLOAT32, true);
auto buf = network.get_serialized_model(false); auto buf = network.get_serialized_model(false);
const int ni = 8, ci = 64, hi = 32, wi = 32; const int ni = 8, ci = 64, hi = 32, wi = 32;
const int no = 1, co = 64, ho = 32, wo = 32; const int no = 8, co = 64, ho = 32, wo = 32;
HostTensorGenerator<dtype::Float32, RandomDistribution::GAUSSIAN> gen(0, 1); HostTensorGenerator<dtype::Float32, RandomDistribution::GAUSSIAN> gen(0, 1);
auto x = gen({ni, ci, hi, wi}, cn); auto x = gen({ni, hi, wi, ci}, cn);
auto add = gen({no, co, ho, wo}, cn); auto add = gen({no, ho, wo, co}, cn);
auto graph = ComputingGraph::make(); auto graph = ComputingGraph::make();
GraphProfiler profiler{graph.get()}; GraphProfiler profiler{graph.get()};
...@@ -708,8 +715,8 @@ TEST(TestMagicMindRuntimeOpr, Profiling) { ...@@ -708,8 +715,8 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
auto out1 = outs[0]; auto out1 = outs[0];
auto out2 = outs[1]; auto out2 = outs[1];
graph->options().var_sanity_check_first_run = false; graph->options().var_sanity_check_first_run = false;
HostTensorND o1(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o1(cn, {no, ho, wo, co}, dtype::Float32());
HostTensorND o2(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o2(cn, {no, ho, wo, co}, dtype::Float32());
auto func = graph->compile( auto func = graph->compile(
{make_callback_copy(out1, o1), make_callback_copy(out2, o2)}); {make_callback_copy(out1, o1), make_callback_copy(out2, o2)});
func->execute(); func->execute();
...@@ -768,9 +775,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) { ...@@ -768,9 +775,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
add_output_mlu_ptr, mlu_deleter}; add_output_mlu_ptr, mlu_deleter};
network.infer_model( network.infer_model(
{conv_input_mlu_ptr, add_output_mlu_ptr}, {conv_input_mlu_ptr, add_input_mlu_ptr},
{relu_output_mlu_ptr, add_output_mlu_ptr}, {relu_output_mlu_ptr, add_output_mlu_ptr},
{Dims{{ni, ci, hi, wi}}, Dims{{no, co, ho, wo}}}); {Dims{{ni, hi, wi, ci}}, Dims{{no, ho, wo, co}}});
// result memory copy cnml->cpu // result memory copy cnml->cpu
// memory copy cpu->mlu // memory copy cpu->mlu
...@@ -784,9 +791,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) { ...@@ -784,9 +791,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
auto cn_cpu = CompNode::load("cpu0"); auto cn_cpu = CompNode::load("cpu0");
auto buf = network.get_serialized_model(false); auto buf = network.get_serialized_model(false);
auto x = std::make_shared<HostTensorND>( auto x = std::make_shared<HostTensorND>(
cn_cpu, TensorLayout{{ni, ci, hi, wi}, dtype::Float32()}); cn_cpu, TensorLayout{{ni, hi, wi, ci}, dtype::Float32()});
auto add = std::make_shared<HostTensorND>( auto add = std::make_shared<HostTensorND>(
cn_cpu, TensorLayout{{no, co, ho, wo}, dtype::Float32()}); cn_cpu, TensorLayout{{no, ho, wo, co}, dtype::Float32()});
std::memcpy( std::memcpy(
reinterpret_cast<void*>(x->ptr<dt_float32>()), conv_input_cpu_data.data(), reinterpret_cast<void*>(x->ptr<dt_float32>()), conv_input_cpu_data.data(),
conv_input_count * sizeof(float)); conv_input_count * sizeof(float));
...@@ -802,13 +809,13 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) { ...@@ -802,13 +809,13 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_}); reinterpret_cast<const void*>(buf.data()), buf.size(), {x_, add_});
auto out1 = outs[0]; auto out1 = outs[0];
auto out2 = outs[1]; auto out2 = outs[1];
HostTensorND o1(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o1(CompNode::default_cpu(), {no, ho, wo, co}, dtype::Float32());
HostTensorND o2(cn, {no, co, ho, wo}, dtype::Float32()); HostTensorND o2(CompNode::default_cpu(), {no, ho, wo, co}, dtype::Float32());
auto func = graph->compile( auto func = graph->compile(
{make_callback_copy(out1, o1), make_callback_copy(out2, o2)}); {make_callback_copy(out1, o1), make_callback_copy(out2, o2)});
func->execute(); func->execute();
HostTensorND o1_mm(cn, {no, co, ho, wo}, dtype::Float32()), HostTensorND o1_mm(cn, {no, ho, wo, co}, dtype::Float32()),
o2_mm(cn, {no, co, ho, wo}, dtype::Float32()); o2_mm(cn, {no, ho, wo, co}, dtype::Float32());
std::memcpy( std::memcpy(
o1_mm.ptr<float>(), relu_output_cpu_data.data(), o1_mm.ptr<float>(), relu_output_cpu_data.data(),
relu_output_count * sizeof(float)); relu_output_count * sizeof(float));
......
...@@ -388,6 +388,13 @@ def CambriconRuntime: MgbHashableOp<"CambriconRuntime"> { ...@@ -388,6 +388,13 @@ def CambriconRuntime: MgbHashableOp<"CambriconRuntime"> {
); );
} }
def MagicMindRuntime: MgbHashableOp<"MagicMindRuntime"> {
let extraArguments = (ins
MgbStringAttr:$buf,
MgbSizeTAddr:$buf_size
);
}
def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>; def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>;
def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [EmptyParam]>; def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [EmptyParam]>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册