Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
f398c8e6
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
f398c8e6
编写于
10月 29, 2021
作者:
M
Megvii Engine Team
提交者:
XindaH
12月 16, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(imperative): add magicmind runtime opr
GitOrigin-RevId: 02ddb886d959104c3aa8320e3ca903e71476c814
上级
f06c890e
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
133 addition
and
50 deletion
+133
-50
imperative/python/megengine/functional/external.py
imperative/python/megengine/functional/external.py
+12
-0
imperative/python/megengine/module/external.py
imperative/python/megengine/module/external.py
+24
-0
imperative/src/impl/ops/magicmind_runtime.cpp
imperative/src/impl/ops/magicmind_runtime.cpp
+36
-0
src/cambricon/impl/magicmind_runtime_opr.cpp
src/cambricon/impl/magicmind_runtime_opr.cpp
+6
-9
src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h
...bricon/include/megbrain/cambricon/magicmind_runtime_opr.h
+1
-1
src/cambricon/test/magicmind_runtime_opr.cpp
src/cambricon/test/magicmind_runtime_opr.cpp
+47
-40
src/core/include/megbrain/ir/ops.td
src/core/include/megbrain/ir/ops.td
+7
-0
未找到文件。
imperative/python/megengine/functional/external.py
浏览文件 @
f398c8e6
...
@@ -66,3 +66,15 @@ def atlas_runtime_opr(inputs, data):
...
@@ -66,3 +66,15 @@ def atlas_runtime_opr(inputs, data):
op
=
builtin
.
AtlasRuntime
(
data
,
len
(
data
))
op
=
builtin
.
AtlasRuntime
(
data
,
len
(
data
))
return
apply
(
op
,
*
inputs
)
return
apply
(
op
,
*
inputs
)
def
magicmind_runtime_opr
(
inputs
,
data
):
r
"""Load a serialized MagicMind model as a runtime operator in MegEngine.
Args:
inputs: list of input tensors.
data: the serialized MagicMind model.
"""
op
=
builtin
.
MagicMindRuntime
(
data
,
len
(
data
))
return
apply
(
op
,
*
inputs
)
imperative/python/megengine/module/external.py
浏览文件 @
f398c8e6
...
@@ -130,3 +130,27 @@ class AtlasRuntimeSubgraph(Module):
...
@@ -130,3 +130,27 @@ class AtlasRuntimeSubgraph(Module):
def
forward
(
self
,
*
inputs
):
def
forward
(
self
,
*
inputs
):
return
atlas_runtime_opr
(
inputs
,
data
=
self
.
_data
)
return
atlas_runtime_opr
(
inputs
,
data
=
self
.
_data
)
class
MagicMindRuntimeSubgraph
(
Module
):
r
"""Load a serialized MagicMindRuntime subgraph.
See :func:`~.magicmind_runtime_opr` for more details.
"""
def
__init__
(
self
,
data
,
**
kwargs
):
super
(
MagicMindRuntimeSubgraph
,
self
).
__init__
(
**
kwargs
)
self
.
_data
=
data
@
property
def
data
(
self
):
return
self
.
_data
@
data
.
setter
def
data
(
self
,
val
):
self
.
_data
=
np
.
frombuffer
(
val
,
dtype
=
np
.
uint8
)
def
forward
(
self
,
*
inputs
):
return
magicmind_runtime_opr
(
inputs
,
data
=
self
.
_data
)
imperative/src/impl/ops/magicmind_runtime.cpp
0 → 100644
浏览文件 @
f398c8e6
/**
* \file imperative/src/impl/ops/magicmind_runtime.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "../op_trait.h"
#include "megbrain/imperative/ops/autogen.h"
#if MGB_CAMBRICON
#include "megbrain/cambricon/magicmind_runtime_opr.h"
namespace
mgb
::
imperative
{
namespace
{
namespace
magicmind_runtime
{
auto
apply_on_var_node
(
const
OpDef
&
def
,
const
VarNodeArray
&
inputs
)
{
auto
&&
op
=
static_cast
<
const
MagicMindRuntime
&>
(
def
);
SymbolVarArray
symbol_var_inputs
(
inputs
.
begin
(),
inputs
.
end
());
OperatorNodeConfig
config
{
op
.
make_name
()};
return
opr
::
MagicMindRuntimeOpr
::
make
(
op
.
buf
.
c_str
(),
op
.
buf_size
,
symbol_var_inputs
,
config
);
}
OP_TRAIT_REG
(
MagicMindRuntime
,
MagicMindRuntime
)
.
apply_on_var_node
(
apply_on_var_node
)
.
fallback
();
}
// namespace magicmind_runtime
}
// namespace
}
// namespace mgb::imperative
#endif
src/cambricon/impl/magicmind_runtime_opr.cpp
浏览文件 @
f398c8e6
...
@@ -166,8 +166,8 @@ MagicMindRuntimeOpr::MagicMindRuntimeOpr(
...
@@ -166,8 +166,8 @@ MagicMindRuntimeOpr::MagicMindRuntimeOpr(
const
OperatorNodeConfig
&
config
)
const
OperatorNodeConfig
&
config
)
:
Super
(
inputs
[
0
]
->
owner_graph
(),
config
,
"magic_runtime"
,
inputs
),
:
Super
(
inputs
[
0
]
->
owner_graph
(),
config
,
"magic_runtime"
,
inputs
),
m_allocator
{
std
::
move
(
allocator
)},
m_allocator
{
std
::
move
(
allocator
)},
m_context
{
nullptr
},
m_engine
{
nullptr
},
m_engine
{
nullptr
},
m_context
{
nullptr
},
m_model
{
std
::
move
(
model
)}
{
m_model
{
std
::
move
(
model
)}
{
mgb_assert
(
mgb_assert
(
inputs
[
0
]
->
comp_node
().
device_type
()
==
CompNode
::
DeviceType
::
CAMBRICON
,
inputs
[
0
]
->
comp_node
().
device_type
()
==
CompNode
::
DeviceType
::
CAMBRICON
,
...
@@ -207,7 +207,7 @@ void MagicMindRuntimeOpr::scn_do_execute() {
...
@@ -207,7 +207,7 @@ void MagicMindRuntimeOpr::scn_do_execute() {
cnrt_env
.
activate
();
cnrt_env
.
activate
();
std
::
vector
<
IRTTensor
*>
inputs
,
outputs
;
std
::
vector
<
IRTTensor
*>
inputs
,
outputs
;
MM_CHECK
(
CreateInputTensors
(
m_context
.
get
(),
&
inputs
));
MM_CHECK
(
CreateInputTensors
(
m_context
.
get
(),
&
inputs
));
MM_CHECK
(
Create
In
putTensors
(
m_context
.
get
(),
&
outputs
));
MM_CHECK
(
Create
Out
putTensors
(
m_context
.
get
(),
&
outputs
));
size_t
nr_inputs
=
input
().
size
();
size_t
nr_inputs
=
input
().
size
();
mgb_assert
(
nr_inputs
==
inputs
.
size
());
mgb_assert
(
nr_inputs
==
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
...
@@ -234,11 +234,9 @@ void MagicMindRuntimeOpr::scn_do_execute() {
...
@@ -234,11 +234,9 @@ void MagicMindRuntimeOpr::scn_do_execute() {
MM_CHECK
(
m_context
->
SetWorkspace
(
output
().
back
()
->
dev_tensor
().
raw_ptr
(),
size
));
MM_CHECK
(
m_context
->
SetWorkspace
(
output
().
back
()
->
dev_tensor
().
raw_ptr
(),
size
));
MM_CHECK
(
m_context
->
Enqueue
(
inputs
,
outputs
,
cnrt_env
.
queue
));
MM_CHECK
(
m_context
->
Enqueue
(
inputs
,
outputs
,
cnrt_env
.
queue
));
for
(
auto
&&
i
:
inputs
)
{
for
(
auto
&&
i
:
inputs
)
{
i
->
SetData
(
nullptr
);
i
->
Destroy
();
i
->
Destroy
();
}
}
for
(
auto
&&
o
:
outputs
)
{
for
(
auto
&&
o
:
outputs
)
{
o
->
SetData
(
nullptr
);
o
->
Destroy
();
o
->
Destroy
();
}
}
}
}
...
@@ -260,7 +258,7 @@ void MagicMindRuntimeOpr::get_output_var_shape(
...
@@ -260,7 +258,7 @@ void MagicMindRuntimeOpr::get_output_var_shape(
}
}
std
::
vector
<
IRTTensor
*>
inputs
,
outputs
;
std
::
vector
<
IRTTensor
*>
inputs
,
outputs
;
MM_CHECK
(
CreateInputTensors
(
m_context
.
get
(),
&
inputs
));
MM_CHECK
(
CreateInputTensors
(
m_context
.
get
(),
&
inputs
));
MM_CHECK
(
Create
In
putTensors
(
m_context
.
get
(),
&
outputs
));
MM_CHECK
(
Create
Out
putTensors
(
m_context
.
get
(),
&
outputs
));
size_t
nr_inputs
=
input
().
size
();
size_t
nr_inputs
=
input
().
size
();
mgb_assert
(
nr_inputs
==
inputs
.
size
());
mgb_assert
(
nr_inputs
==
inputs
.
size
());
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
...
@@ -295,12 +293,11 @@ void MagicMindRuntimeOpr::get_output_var_shape(
...
@@ -295,12 +293,11 @@ void MagicMindRuntimeOpr::get_output_var_shape(
false
,
"static shape infer for MagicMindRuntimeOpr(%s) failed"
,
false
,
"static shape infer for MagicMindRuntimeOpr(%s) failed"
,
cname
());
cname
());
}
}
return
;
for
(
auto
&&
i
:
inputs
)
{
for
(
auto
&&
i
:
inputs
)
{
i
->
SetData
(
nullptr
);
i
->
Destroy
();
i
->
Destroy
();
}
}
for
(
auto
&&
o
:
outputs
)
{
for
(
auto
&&
o
:
outputs
)
{
o
->
SetData
(
nullptr
);
o
->
Destroy
();
o
->
Destroy
();
}
}
}
}
...
@@ -332,10 +329,10 @@ void MagicMindRuntimeOpr::init_output_dtype() {
...
@@ -332,10 +329,10 @@ void MagicMindRuntimeOpr::init_output_dtype() {
}
}
std
::
vector
<
DataType
>
out_dtypes
=
m_model
->
GetOutputDataTypes
();
std
::
vector
<
DataType
>
out_dtypes
=
m_model
->
GetOutputDataTypes
();
mgb_assert
(
mgb_assert
(
out_dtypes
.
size
()
==
output
().
size
(),
out_dtypes
.
size
()
+
1
==
output
().
size
(),
"output size mismatch(got:%zu,expected:%zu)"
,
out_dtypes
.
size
(),
"output size mismatch(got:%zu,expected:%zu)"
,
out_dtypes
.
size
(),
output
().
size
());
output
().
size
());
size_t
nr_outputs
=
out
put
()
.
size
();
size_t
nr_outputs
=
out
_dtypes
.
size
();
for
(
size_t
i
=
0
;
i
<
nr_outputs
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
nr_outputs
;
++
i
)
{
auto
dt_mm
=
mm_dtype_to_mgb_dtype
(
out_dtypes
[
i
]);
auto
dt_mm
=
mm_dtype_to_mgb_dtype
(
out_dtypes
[
i
]);
mgb_assert
(
mgb_assert
(
...
...
src/cambricon/include/megbrain/cambricon/magicmind_runtime_opr.h
浏览文件 @
f398c8e6
...
@@ -90,8 +90,8 @@ public:
...
@@ -90,8 +90,8 @@ public:
private:
private:
CambriconAllocatorPtr
m_allocator
;
CambriconAllocatorPtr
m_allocator
;
mutable
IContextPtr
m_context
;
IEnginePtr
m_engine
;
IEnginePtr
m_engine
;
mutable
IContextPtr
m_context
;
IModelPtr
m_model
;
IModelPtr
m_model
;
};
};
...
...
src/cambricon/test/magicmind_runtime_opr.cpp
浏览文件 @
f398c8e6
...
@@ -135,10 +135,10 @@ public:
...
@@ -135,10 +135,10 @@ public:
constexpr
int
kh
=
3
,
kw
=
3
;
constexpr
int
kh
=
3
,
kw
=
3
;
constexpr
int
stride_h
=
1
,
stride_w
=
1
;
constexpr
int
stride_h
=
1
,
stride_w
=
1
;
constexpr
int
pad_h
=
1
,
pad_w
=
1
;
constexpr
int
pad_h
=
1
,
pad_w
=
1
;
magicmind
::
Dims
input_dim
{{
ni
,
ci
,
hi
,
w
i
}};
magicmind
::
Dims
input_dim
{{
ni
,
hi
,
wi
,
c
i
}};
magicmind
::
Dims
filter_dim
{{
co
,
ci
,
kh
,
kw
}};
magicmind
::
Dims
filter_dim
{{
co
,
kh
,
kw
,
ci
}};
magicmind
::
Dims
bias_dim
{{
co
}};
magicmind
::
Dims
bias_dim
{{
co
}};
magicmind
::
Dims
add_dim
{{
no
,
co
,
ho
,
w
o
}};
magicmind
::
Dims
add_dim
{{
no
,
ho
,
wo
,
c
o
}};
magicmind
::
DataType
output_datatype
=
magicmind
::
DataType
::
FLOAT32
;
magicmind
::
DataType
output_datatype
=
magicmind
::
DataType
::
FLOAT32
;
// init
// init
...
@@ -148,13 +148,13 @@ public:
...
@@ -148,13 +148,13 @@ public:
{
{
"graph_shape_mutable": {{GRAPH_SHAPE_MUTABLE}},
"graph_shape_mutable": {{GRAPH_SHAPE_MUTABLE}},
"precision_config": {
"precision_config": {
"precision_mode": "qint8_mixed_float
16
"
"precision_mode": "qint8_mixed_float
32
"
}
}
}
}
)"
;
)"
;
replace_all_pairs_inplace
(
replace_all_pairs_inplace
(
user_json_config
,
user_json_config
,
{{
"{{GRAPH_SHAPE_MUTABLE}}"
,
std
::
to_string
(
graph_shape_mutable_
)
}});
{{
"{{GRAPH_SHAPE_MUTABLE}}"
,
graph_shape_mutable_
?
"true"
:
"false"
}});
config
->
ParseFromString
(
user_json_config
);
config
->
ParseFromString
(
user_json_config
);
auto
network
=
make_mm_unique_ptr
(
magicmind
::
CreateINetwork
());
auto
network
=
make_mm_unique_ptr
(
magicmind
::
CreateINetwork
());
magicmind
::
Range
filter_range
=
{
0.0
f
,
0.0
f
};
magicmind
::
Range
filter_range
=
{
0.0
f
,
0.0
f
};
...
@@ -278,6 +278,9 @@ public:
...
@@ -278,6 +278,9 @@ public:
std
::
string
buf
;
std
::
string
buf
;
buf
.
resize
(
size
);
buf
.
resize
(
size
);
MM_CHECK
(
model_
->
SerializeToMemory
(
reinterpret_cast
<
void
*>
(
buf
.
data
()),
size
));
MM_CHECK
(
model_
->
SerializeToMemory
(
reinterpret_cast
<
void
*>
(
buf
.
data
()),
size
));
model_
.
reset
();
model_
=
std
::
move
(
MagicMindRuntimeOpr
::
make_model_ptr
(
CreateIModel
()));
model_
->
DeserializeFromMemory
(
reinterpret_cast
<
void
*>
(
buf
.
data
()),
size
);
if
(
serialize_to_file
)
{
if
(
serialize_to_file
)
{
std
::
string
fname
=
ssprintf
(
std
::
string
fname
=
ssprintf
(
"./output/MagicMindRuntimeOprTest.%s.mlu"
,
"./output/MagicMindRuntimeOprTest.%s.mlu"
,
...
@@ -332,6 +335,10 @@ public:
...
@@ -332,6 +335,10 @@ public:
printf
(
"inference time = %.2fs
\n
"
,
time
/
static_cast
<
float
>
(
runs
)
*
1e-3
);
printf
(
"inference time = %.2fs
\n
"
,
time
/
static_cast
<
float
>
(
runs
)
*
1e-3
);
MGB_CNRT_CHECK
(
cnrtDestroyNotifier
(
&
start
));
MGB_CNRT_CHECK
(
cnrtDestroyNotifier
(
&
start
));
MGB_CNRT_CHECK
(
cnrtDestroyNotifier
(
&
end
));
MGB_CNRT_CHECK
(
cnrtDestroyNotifier
(
&
end
));
for
(
auto
&&
i
:
input_tensors
)
i
->
Destroy
();
for
(
auto
&&
o
:
output_tensors
)
o
->
Destroy
();
}
}
};
};
}
// namespace
}
// namespace
...
@@ -387,9 +394,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
...
@@ -387,9 +394,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
add_output_mlu_ptr
,
mlu_deleter
};
add_output_mlu_ptr
,
mlu_deleter
};
network
.
infer_model
(
network
.
infer_model
(
{
conv_input_mlu_ptr
,
add_
out
put_mlu_ptr
},
{
conv_input_mlu_ptr
,
add_
in
put_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
Dims
{{
ni
,
ci
,
hi
,
wi
}},
Dims
{{
no
,
co
,
ho
,
w
o
}}});
{
Dims
{{
ni
,
hi
,
wi
,
ci
}},
Dims
{{
no
,
ho
,
wo
,
c
o
}}});
// result memory copy cnml->cpu
// result memory copy cnml->cpu
// memory copy cpu->mlu
// memory copy cpu->mlu
...
@@ -402,9 +409,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
...
@@ -402,9 +409,9 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
ni
,
ci
,
hi
,
w
i
},
dtype
::
Float32
()});
cn
,
TensorLayout
{{
ni
,
hi
,
wi
,
c
i
},
dtype
::
Float32
()});
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()});
cn
,
TensorLayout
{{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()});
std
::
memcpy
(
std
::
memcpy
(
reinterpret_cast
<
void
*>
(
x
->
ptr
<
dt_float32
>
()),
conv_input_cpu_data
.
data
(),
reinterpret_cast
<
void
*>
(
x
->
ptr
<
dt_float32
>
()),
conv_input_cpu_data
.
data
(),
conv_input_count
*
sizeof
(
float
));
conv_input_count
*
sizeof
(
float
));
...
@@ -418,13 +425,13 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
...
@@ -418,13 +425,13 @@ TEST(TestMagicMindRuntimeOpr, Basic) {
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
auto
out1
=
outs
[
0
];
auto
out1
=
outs
[
0
];
auto
out2
=
outs
[
1
];
auto
out2
=
outs
[
1
];
HostTensorND
o1
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o1
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
auto
func
=
graph
->
compile
(
auto
func
=
graph
->
compile
(
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
func
->
execute
();
func
->
execute
();
HostTensorND
o1_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()),
HostTensorND
o1_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()),
o2_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
o2_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
std
::
memcpy
(
std
::
memcpy
(
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
relu_output_count
*
sizeof
(
float
));
relu_output_count
*
sizeof
(
float
));
...
@@ -486,9 +493,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
...
@@ -486,9 +493,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
add_output_mlu_ptr
,
mlu_deleter
};
add_output_mlu_ptr
,
mlu_deleter
};
network
.
infer_model
(
network
.
infer_model
(
{
conv_input_mlu_ptr
,
add_
out
put_mlu_ptr
},
{
conv_input_mlu_ptr
,
add_
in
put_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
Dims
{{
ni
,
ci
,
hi
,
wi
}},
Dims
{{
no
,
co
,
ho
,
w
o
}}});
{
Dims
{{
ni
,
hi
,
wi
,
ci
}},
Dims
{{
no
,
ho
,
wo
,
c
o
}}});
// result memory copy cnml->cpu
// result memory copy cnml->cpu
// memory copy cpu->mlu
// memory copy cpu->mlu
...
@@ -501,9 +508,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
...
@@ -501,9 +508,9 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
ni
,
ci
,
hi
,
w
i
},
dtype
::
QuantizedS8
{
1.
f
}});
cn
,
TensorLayout
{{
ni
,
hi
,
wi
,
c
i
},
dtype
::
QuantizedS8
{
1.
f
}});
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()});
cn
,
TensorLayout
{{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()});
std
::
memcpy
(
std
::
memcpy
(
reinterpret_cast
<
void
*>
(
x
->
raw_ptr
()),
conv_input_cpu_data
.
data
(),
reinterpret_cast
<
void
*>
(
x
->
raw_ptr
()),
conv_input_cpu_data
.
data
(),
conv_input_count
*
sizeof
(
int8_t
));
conv_input_count
*
sizeof
(
int8_t
));
...
@@ -517,13 +524,13 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
...
@@ -517,13 +524,13 @@ TEST(TestMagicMindRuntimeOpr, InputQInt8) {
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
auto
out1
=
outs
[
0
];
auto
out1
=
outs
[
0
];
auto
out2
=
outs
[
1
];
auto
out2
=
outs
[
1
];
HostTensorND
o1
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o1
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
auto
func
=
graph
->
compile
(
auto
func
=
graph
->
compile
(
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
func
->
execute
();
func
->
execute
();
HostTensorND
o1_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()),
HostTensorND
o1_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()),
o2_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
o2_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
std
::
memcpy
(
std
::
memcpy
(
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
relu_output_count
*
sizeof
(
float
));
relu_output_count
*
sizeof
(
float
));
...
@@ -591,9 +598,9 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
...
@@ -591,9 +598,9 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
add_output_mlu_ptr
,
mlu_deleter
};
add_output_mlu_ptr
,
mlu_deleter
};
network
.
infer_model
(
network
.
infer_model
(
{
conv_input_mlu_ptr
,
add_
out
put_mlu_ptr
},
{
conv_input_mlu_ptr
,
add_
in
put_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
Dims
{{
ni
,
ci
,
hi
,
wi
}},
Dims
{{
no
,
co
,
ho
,
w
o
}}});
{
Dims
{{
ni
,
hi
,
wi
,
ci
}},
Dims
{{
no
,
ho
,
wo
,
c
o
}}});
// result memory copy cnml->cpu
// result memory copy cnml->cpu
// memory copy cpu->mlu
// memory copy cpu->mlu
...
@@ -607,11 +614,11 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
...
@@ -607,11 +614,11 @@ TEST(TestMagicMindRuntimeOpr, GraphShapeMutable) {
auto
buf
=
network
.
get_serialized_model
(
true
);
auto
buf
=
network
.
get_serialized_model
(
true
);
auto
mkshp
=
[](
int
n
,
int
c
,
int
h
,
int
w
)
{
auto
mkshp
=
[](
int
n
,
int
c
,
int
h
,
int
w
)
{
size_t
nz
=
n
,
cz
=
c
,
hz
=
h
,
wz
=
w
;
size_t
nz
=
n
,
cz
=
c
,
hz
=
h
,
wz
=
w
;
return
TensorShape
{
nz
,
cz
,
hz
,
w
z
};
return
TensorShape
{
nz
,
hz
,
wz
,
c
z
};
};
};
auto
mkly
=
[](
int
n
,
int
c
,
int
h
,
int
w
,
DType
dtype
)
{
auto
mkly
=
[](
int
n
,
int
c
,
int
h
,
int
w
,
DType
dtype
)
{
size_t
nz
=
n
,
cz
=
c
,
hz
=
h
,
wz
=
w
;
size_t
nz
=
n
,
cz
=
c
,
hz
=
h
,
wz
=
w
;
return
TensorLayout
{{
nz
,
cz
,
hz
,
w
z
},
dtype
};
return
TensorLayout
{{
nz
,
hz
,
wz
,
c
z
},
dtype
};
};
};
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
mkly
(
ni
,
ci
,
hi
,
wi
,
dtype
::
Float32
()));
cn
,
mkly
(
ni
,
ci
,
hi
,
wi
,
dtype
::
Float32
()));
...
@@ -662,9 +669,9 @@ TEST(TestMagicMindRuntimeOpr, Serialization) {
...
@@ -662,9 +669,9 @@ TEST(TestMagicMindRuntimeOpr, Serialization) {
const
int
ni
=
1
,
ci
=
64
,
hi
=
32
,
wi
=
32
;
const
int
ni
=
1
,
ci
=
64
,
hi
=
32
,
wi
=
32
;
const
int
no
=
1
,
co
=
64
,
ho
=
32
,
wo
=
32
;
const
int
no
=
1
,
co
=
64
,
ho
=
32
,
wo
=
32
;
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
ni
,
ci
,
hi
,
w
i
},
dtype
::
Float32
()});
cn
,
TensorLayout
{{
ni
,
hi
,
wi
,
c
i
},
dtype
::
Float32
()});
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
cn
,
TensorLayout
{{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()});
cn
,
TensorLayout
{{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()});
auto
graph
=
ComputingGraph
::
make
();
auto
graph
=
ComputingGraph
::
make
();
auto
x_
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
x
);
auto
x_
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
x
);
auto
add_
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
add
);
auto
add_
=
opr
::
Host2DeviceCopy
::
make
(
*
graph
,
add
);
...
@@ -693,11 +700,11 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
...
@@ -693,11 +700,11 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
MMNetwork
network
(
cn
,
magicmind
::
DataType
::
FLOAT32
,
true
);
MMNetwork
network
(
cn
,
magicmind
::
DataType
::
FLOAT32
,
true
);
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
buf
=
network
.
get_serialized_model
(
false
);
const
int
ni
=
8
,
ci
=
64
,
hi
=
32
,
wi
=
32
;
const
int
ni
=
8
,
ci
=
64
,
hi
=
32
,
wi
=
32
;
const
int
no
=
1
,
co
=
64
,
ho
=
32
,
wo
=
32
;
const
int
no
=
8
,
co
=
64
,
ho
=
32
,
wo
=
32
;
HostTensorGenerator
<
dtype
::
Float32
,
RandomDistribution
::
GAUSSIAN
>
gen
(
0
,
1
);
HostTensorGenerator
<
dtype
::
Float32
,
RandomDistribution
::
GAUSSIAN
>
gen
(
0
,
1
);
auto
x
=
gen
({
ni
,
ci
,
hi
,
w
i
},
cn
);
auto
x
=
gen
({
ni
,
hi
,
wi
,
c
i
},
cn
);
auto
add
=
gen
({
no
,
co
,
ho
,
w
o
},
cn
);
auto
add
=
gen
({
no
,
ho
,
wo
,
c
o
},
cn
);
auto
graph
=
ComputingGraph
::
make
();
auto
graph
=
ComputingGraph
::
make
();
GraphProfiler
profiler
{
graph
.
get
()};
GraphProfiler
profiler
{
graph
.
get
()};
...
@@ -708,8 +715,8 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
...
@@ -708,8 +715,8 @@ TEST(TestMagicMindRuntimeOpr, Profiling) {
auto
out1
=
outs
[
0
];
auto
out1
=
outs
[
0
];
auto
out2
=
outs
[
1
];
auto
out2
=
outs
[
1
];
graph
->
options
().
var_sanity_check_first_run
=
false
;
graph
->
options
().
var_sanity_check_first_run
=
false
;
HostTensorND
o1
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o1
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
auto
func
=
graph
->
compile
(
auto
func
=
graph
->
compile
(
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
func
->
execute
();
func
->
execute
();
...
@@ -768,9 +775,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
...
@@ -768,9 +775,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
add_output_mlu_ptr
,
mlu_deleter
};
add_output_mlu_ptr
,
mlu_deleter
};
network
.
infer_model
(
network
.
infer_model
(
{
conv_input_mlu_ptr
,
add_
out
put_mlu_ptr
},
{
conv_input_mlu_ptr
,
add_
in
put_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
relu_output_mlu_ptr
,
add_output_mlu_ptr
},
{
Dims
{{
ni
,
ci
,
hi
,
wi
}},
Dims
{{
no
,
co
,
ho
,
w
o
}}});
{
Dims
{{
ni
,
hi
,
wi
,
ci
}},
Dims
{{
no
,
ho
,
wo
,
c
o
}}});
// result memory copy cnml->cpu
// result memory copy cnml->cpu
// memory copy cpu->mlu
// memory copy cpu->mlu
...
@@ -784,9 +791,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
...
@@ -784,9 +791,9 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
auto
cn_cpu
=
CompNode
::
load
(
"cpu0"
);
auto
cn_cpu
=
CompNode
::
load
(
"cpu0"
);
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
buf
=
network
.
get_serialized_model
(
false
);
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
auto
x
=
std
::
make_shared
<
HostTensorND
>
(
cn_cpu
,
TensorLayout
{{
ni
,
ci
,
hi
,
w
i
},
dtype
::
Float32
()});
cn_cpu
,
TensorLayout
{{
ni
,
hi
,
wi
,
c
i
},
dtype
::
Float32
()});
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
auto
add
=
std
::
make_shared
<
HostTensorND
>
(
cn_cpu
,
TensorLayout
{{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()});
cn_cpu
,
TensorLayout
{{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()});
std
::
memcpy
(
std
::
memcpy
(
reinterpret_cast
<
void
*>
(
x
->
ptr
<
dt_float32
>
()),
conv_input_cpu_data
.
data
(),
reinterpret_cast
<
void
*>
(
x
->
ptr
<
dt_float32
>
()),
conv_input_cpu_data
.
data
(),
conv_input_count
*
sizeof
(
float
));
conv_input_count
*
sizeof
(
float
));
...
@@ -802,13 +809,13 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
...
@@ -802,13 +809,13 @@ TEST(TestMagicMindRuntimeOpr, CrossCNCopy) {
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
reinterpret_cast
<
const
void
*>
(
buf
.
data
()),
buf
.
size
(),
{
x_
,
add_
});
auto
out1
=
outs
[
0
];
auto
out1
=
outs
[
0
];
auto
out2
=
outs
[
1
];
auto
out2
=
outs
[
1
];
HostTensorND
o1
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o1
(
CompNode
::
default_cpu
(),
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
HostTensorND
o2
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
HostTensorND
o2
(
CompNode
::
default_cpu
(),
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
auto
func
=
graph
->
compile
(
auto
func
=
graph
->
compile
(
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
{
make_callback_copy
(
out1
,
o1
),
make_callback_copy
(
out2
,
o2
)});
func
->
execute
();
func
->
execute
();
HostTensorND
o1_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
()),
HostTensorND
o1_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
()),
o2_mm
(
cn
,
{
no
,
co
,
ho
,
w
o
},
dtype
::
Float32
());
o2_mm
(
cn
,
{
no
,
ho
,
wo
,
c
o
},
dtype
::
Float32
());
std
::
memcpy
(
std
::
memcpy
(
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
o1_mm
.
ptr
<
float
>
(),
relu_output_cpu_data
.
data
(),
relu_output_count
*
sizeof
(
float
));
relu_output_count
*
sizeof
(
float
));
...
...
src/core/include/megbrain/ir/ops.td
浏览文件 @
f398c8e6
...
@@ -388,6 +388,13 @@ def CambriconRuntime: MgbHashableOp<"CambriconRuntime"> {
...
@@ -388,6 +388,13 @@ def CambriconRuntime: MgbHashableOp<"CambriconRuntime"> {
);
);
}
}
def MagicMindRuntime: MgbHashableOp<"MagicMindRuntime"> {
let extraArguments = (ins
MgbStringAttr:$buf,
MgbSizeTAddr:$buf_size
);
}
def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>;
def CvtColor: MgbHashableOp<"CvtColor", [CvtColorParam]>;
def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [EmptyParam]>;
def CheckNonFinite: MgbHashableOp<"CheckNonFinite", [EmptyParam]>;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录