Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
11556cba
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
11556cba
编写于
10月 27, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(megbrain): add mc20 bazel
GitOrigin-RevId: 96d57656e49e3ef72b26a3a9a464546b23c3fca8
上级
d2c37fe4
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
0 addition
and
580 deletion
+0
-580
cmake/mc20.cmake
cmake/mc20.cmake
+0
-29
src/opr/impl/mc20_runtime_op.cpp
src/opr/impl/mc20_runtime_op.cpp
+0
-379
src/opr/impl/mc20_runtime_op.oprdecl
src/opr/impl/mc20_runtime_op.oprdecl
+0
-17
src/opr/impl/mc20_runtime_op.sereg.h
src/opr/impl/mc20_runtime_op.sereg.h
+0
-72
src/opr/include/megbrain/opr/mc20_runtime_op.h
src/opr/include/megbrain/opr/mc20_runtime_op.h
+0
-83
未找到文件。
cmake/mc20.cmake
已删除
100644 → 0
浏览文件 @
d2c37fe4
find_path
(
MC20_ROOT_DIR
include/ax_interpreter_external_api.h
PATHS
${
PROJECT_SOURCE_DIR
}
/third_party/mc20/
$ENV{MC20DIR}
)
if
(
${
MC20_ROOT_DIR
}
STREQUAL
"MC20_ROOT_DIR-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find MC20"
)
endif
()
message
(
STATUS
"Build with MC20 in
${
MC20_ROOT_DIR
}
"
)
find_path
(
MC20_INCLUDE_DIR
ax_interpreter_external_api.h
PATHS
${
MC20_ROOT_DIR
}
/include
${
INCLUDE_INSTALL_DIR
}
)
add_library
(
libmc20 INTERFACE IMPORTED
)
find_library
(
MC20_LIBRARY
NAMES libax_interpreter_external.x86.a
PATHS
${
MC20_ROOT_DIR
}
/lib/
)
if
(
${
MC20_LIBRARY
}
STREQUAL
"MC20_LIBRARY-NOTFOUND"
)
message
(
FATAL_ERROR
"Can not find MC20 library"
)
endif
()
target_link_libraries
(
libmc20 INTERFACE
${
MC20_LIBRARY
}
)
target_include_directories
(
libmc20 INTERFACE
${
MC20_INCLUDE_DIR
}
)
src/opr/impl/mc20_runtime_op.cpp
已删除
100644 → 0
浏览文件 @
d2c37fe4
/**
* \file src/opr/impl/mc20_runtime_op.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/mc20_runtime_op.h"
#include "megbrain/common.h"
#include "megbrain/graph/event.h"
#include "megdnn/dtype.h"
#include <memory>
#if MGB_MC20
using
namespace
mgb
;
using
namespace
opr
;
namespace
{
TensorShape
mc20_shape_to_mgb_shape
(
AX_NPU_SDK_EX_TENSOR_META_T
tensor_meta
)
{
TensorShape
ret
;
ret
.
ndim
=
tensor_meta
.
nShapeNDim
;
for
(
size_t
i
=
0
;
i
<
ret
.
ndim
;
++
i
)
{
ret
[
i
]
=
tensor_meta
.
pShape
[
i
];
}
return
ret
;
}
DType
mc20_dtype_to_mgb_dtype
(
AX_NPU_SDK_EX_ADV_TENSOR_DTYPE
data_type
)
{
switch
(
data_type
)
{
case
AX_NPU_TDT_UINT8
:
return
dtype
::
Uint8
();
case
AX_NPU_TDT_FLOAT32
:
return
dtype
::
Float32
();
case
AX_NPU_TDT_INT16
:
return
dtype
::
Int16
();
case
AX_NPU_TDT_INT32
:
return
dtype
::
Int32
();
default:
mgb_throw
(
MegBrainError
,
"MC20DataType %d is not supported by MegBrain."
,
static_cast
<
int
>
(
data_type
));
}
}
};
// namespace
constexpr
AX_NPU_SDK_EX_HANDLE_T
MC20RuntimeOpr
::
INVALID_MODEL_HANDLE
;
/* ====================== MC20RuntimeOpr ==================== */
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
MC20RuntimeOpr
);
MC20RuntimeOpr
::
MC20RuntimeOpr
(
SharedBuffer
buf
,
AX_NPU_SDK_EX_HANDLE_T
model_handle
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
)
:
Super
(
inputs
[
0
]
->
owner_graph
(),
config
,
"mc20_runtime"
,
inputs
),
m_buffer
{
std
::
move
(
buf
)},
m_model_handle
(
model_handle
)
{
mgb_assert
(
inputs
[
0
]
->
comp_node
().
device_type
()
==
CompNode
::
DeviceType
::
MC20
,
"MC20RuntimeOpr can only be used on mc20 comp node; "
"got %s"
,
inputs
[
0
]
->
comp_node
().
to_string
().
c_str
());
for
(
auto
i
:
inputs
)
{
add_input
({
i
});
}
if
(
m_model_handle
==
INVALID_MODEL_HANDLE
)
{
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_Create_handle
(
&
m_model_handle
,
m_buffer
.
data
(),
m_buffer
.
size
()));
m_is_model_holder
=
true
;
}
const
AX_NPU_SDK_EX_ADV_IO_INFO_T
*
io_info
=
AX_NPU_SDK_EX_ADV_Get_io_info
(
m_model_handle
);
size_t
nr_outputs
=
io_info
->
nOutputSize
;
bool
has_workspace
=
false
;
if
(
nr_outputs
==
1
)
{
const
auto
&
tensor_meta
=
*
(
io_info
->
pOutputs
[
0
].
pTensorMeta
);
add_output
(
std
::
string
(
reinterpret_cast
<
char
*>
(
tensor_meta
.
pName
)));
if
(
tensor_meta
.
eMemoryType
==
AX_NPU_MT_VIRTUAL
)
{
mgb_assert
(
tensor_meta
.
nInnerSize
>
0
);
has_workspace
=
true
;
}
}
else
{
for
(
size_t
i
=
0
;
i
<
nr_outputs
;
++
i
)
{
const
auto
&
tensor_meta
=
*
(
io_info
->
pOutputs
[
i
].
pTensorMeta
);
add_output
(
std
::
string
(
reinterpret_cast
<
char
*>
(
tensor_meta
.
pName
)));
if
(
tensor_meta
.
eMemoryType
==
AX_NPU_MT_VIRTUAL
)
{
mgb_assert
(
tensor_meta
.
nInnerSize
>
0
);
has_workspace
=
true
;
}
}
}
mgb_assert
(
has_workspace
,
"Currently only support model with cpu tail"
);
//! \warning There is no interface in MC20 to get the batch size of
//! model.MC20 supports multi-batch by changing the input of n-batch to n
//! 1-batch input.
mgb_assert
(
io_info
->
nInputSize
%
inputs
.
size
()
==
0
,
"The number of inputs in the neu model should be multiple of "
"the number of inputs in megbrain, but got %zu(neu model) vs "
"%zu(mgb model)"
,
io_info
->
nInputSize
,
inputs
.
size
());
m_model_batch
=
reinterpret_cast
<
size_t
>
(
io_info
->
nInputSize
/
inputs
.
size
());
add_equivalence_component
<
mgb
::
ScalarHash
<
const
void
*>>
(
m_buffer
.
data
());
cg
::
add_workspace_output
(
this
);
};
MC20RuntimeOpr
::~
MC20RuntimeOpr
()
{
if
(
m_is_model_holder
)
{
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_Destroy_handle
(
m_model_handle
));
}
}
void
MC20RuntimeOpr
::
execute_mc20
()
{
auto
&&
mc20_env
=
CompNodeEnv
::
from_comp_node
(
input
(
0
)
->
comp_node
()).
mc20_env
();
mc20_env
.
activate
();
const
AX_NPU_SDK_EX_ADV_IO_INFO_T
*
io_info
=
AX_NPU_SDK_EX_ADV_Get_io_info
(
m_model_handle
);
AX_NPU_SDK_EX_IO_T
npu_io
;
memset
(
&
npu_io
,
0
,
sizeof
(
npu_io
));
size_t
batch_size
=
input
(
0
)
->
dev_tensor
().
layout
().
shape
[
0
];
for
(
size_t
batch_idx
=
0
;
batch_idx
<
batch_size
;
batch_idx
+=
m_model_batch
)
{
//! prepare input
npu_io
.
nInputSize
=
io_info
->
nInputSize
;
auto
inputs
=
std
::
make_unique
<
AX_NPU_SDK_EX_BUF_T
[]
>
(
npu_io
.
nInputSize
);
npu_io
.
pInputs
=
inputs
.
get
();
for
(
size_t
i
=
0
;
i
<
npu_io
.
nInputSize
;
i
++
)
{
// get input addr info
size_t
inp_idx
=
reinterpret_cast
<
size_t
>
(
i
/
m_model_batch
);
AX_VOID
*
p_virtual_addr
=
input
(
inp_idx
)
->
dev_tensor
().
raw_ptr
();
AX_U64
phy_addr
=
MC20MemoryManager
::
Instance
().
get_phyaddr
(
p_virtual_addr
);
auto
nr_bytes_per_batch
=
input
(
inp_idx
)
->
layout
().
span
().
dist_byte
()
/
batch_size
;
// add batch offset
p_virtual_addr
=
reinterpret_cast
<
AX_VOID
*>
(
reinterpret_cast
<
AX_U64
>
(
p_virtual_addr
)
+
nr_bytes_per_batch
*
(
batch_idx
+
i
%
m_model_batch
));
phy_addr
+=
nr_bytes_per_batch
*
(
batch_idx
+
i
%
m_model_batch
);
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_ADV_Make_io_buffer
(
phy_addr
,
p_virtual_addr
,
nr_bytes_per_batch
,
phy_addr
,
p_virtual_addr
,
nr_bytes_per_batch
,
&
npu_io
.
pInputs
[
i
]));
}
//! prepare output
npu_io
.
nOutputSize
=
io_info
->
nOutputSize
;
auto
outputs
=
std
::
make_unique
<
AX_NPU_SDK_EX_BUF_T
[]
>
(
npu_io
.
nOutputSize
);
npu_io
.
pOutputs
=
outputs
.
get
();
AX_U32
offset
=
0
;
AX_VOID
*
inner_virtual_addr_start
=
nullptr
;
AX_U64
inner_phy_addr_start
=
0
;
// get innder addr form workspace
inner_virtual_addr_start
=
output
(
npu_io
.
nOutputSize
)
->
dev_tensor
().
raw_ptr
();
inner_phy_addr_start
=
MC20MemoryManager
::
Instance
().
get_phyaddr
(
inner_virtual_addr_start
);
for
(
size_t
i
=
0
;
i
<
npu_io
.
nOutputSize
;
i
++
)
{
// get output addr info
AX_VOID
*
p_virtual_addr
=
output
(
i
)
->
dev_tensor
().
raw_ptr
();
AX_U64
phy_addr
=
0
;
auto
nr_bytes_per_batch
=
output
(
i
)
->
layout
().
span
().
dist_byte
()
/
batch_size
;
// add batch offset
p_virtual_addr
=
reinterpret_cast
<
AX_VOID
*>
(
reinterpret_cast
<
AX_U64
>
(
p_virtual_addr
)
+
nr_bytes_per_batch
*
batch_idx
);
phy_addr
+=
nr_bytes_per_batch
*
batch_idx
;
const
auto
&
tensor_meta
=
*
(
io_info
->
pOutputs
[
i
].
pTensorMeta
);
if
(
tensor_meta
.
eMemoryType
==
AX_NPU_MT_PHYSICAL
)
{
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_ADV_Make_io_buffer
(
phy_addr
,
p_virtual_addr
,
nr_bytes_per_batch
,
phy_addr
,
p_virtual_addr
,
nr_bytes_per_batch
,
&
npu_io
.
pOutputs
[
i
]));
}
else
if
(
tensor_meta
.
eMemoryType
==
AX_NPU_MT_VIRTUAL
)
{
auto
p_inner_virtual_addr
=
reinterpret_cast
<
AX_VOID
*>
(
reinterpret_cast
<
AX_U64
>
(
inner_virtual_addr_start
)
+
offset
);
auto
innerphy_addr
=
inner_phy_addr_start
+
offset
;
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_ADV_Make_io_buffer
(
phy_addr
,
p_virtual_addr
,
nr_bytes_per_batch
,
innerphy_addr
,
p_inner_virtual_addr
,
tensor_meta
.
nInnerSize
,
&
npu_io
.
pOutputs
[
i
]));
offset
+=
tensor_meta
.
nInnerSize
;
}
}
MGB_MC20_CHECK
(
AX_NPU_SDK_EX_Run_task_sync
(
m_model_handle
,
&
npu_io
));
}
}
void
MC20RuntimeOpr
::
init_output_comp_node
()
{
//! set output to cpu compnode if has cpu tail
const
AX_NPU_SDK_EX_ADV_IO_INFO_T
*
io_info
=
AX_NPU_SDK_EX_ADV_Get_io_info
(
m_model_handle
);
CompNode
input_cn
;
for
(
auto
&&
i
:
input
())
{
if
(
!
input_cn
.
valid
())
{
input_cn
=
i
->
comp_node
();
}
else
{
mgb_assert
(
input_cn
.
mem_node
()
==
i
->
comp_node
().
mem_node
(),
"opr %s{%s} requires all input to be on the same memory "
"node expect=%s cur_var=%s cur_cn=%s"
,
this
->
cname
(),
this
->
dyn_typeinfo
()
->
name
,
input_cn
.
to_string
().
c_str
(),
i
->
cname
(),
i
->
comp_node
().
to_string
().
c_str
());
}
}
for
(
size_t
i
=
0
;
i
<
io_info
->
nOutputSize
;
i
++
)
{
//! compnode of the var should be default_cpu as the output will be
//! proxy to user
output
(
i
)
->
comp_node
(
CompNode
::
default_cpu
());
}
//! the last output is workspace, which should be the same as input
output
(
io_info
->
nOutputSize
)
->
comp_node
(
input_cn
);
}
MC20RuntimeOpr
::
NodeProp
*
MC20RuntimeOpr
::
do_make_node_prop
()
const
{
auto
ret
=
Super
::
do_make_node_prop
();
ret
->
add_flag
(
NodeProp
::
Flag
::
CROSS_COMP_NODE_MEMORY
);
return
ret
;
}
void
MC20RuntimeOpr
::
do_execute
(
ExecEnv
&
env
)
{
CompNode
cn
=
output
(
0
)
->
comp_node
();
auto
runner
=
[
this
,
cn
]()
{
this
->
owner_graph
()
->
event
().
signal_inplace
<
cg
::
event
::
BeforeKernel
>
(
this
,
cn
);
cn
.
activate
();
execute_mc20
();
this
->
owner_graph
()
->
event
().
signal_inplace
<
cg
::
event
::
AfterKernel
>
(
this
,
cn
);
};
env
.
dispatch_on_comp_node
(
cn
,
runner
);
// Send BeforeKernel/AfterKernel event on every different comp_node
ThinHashSet
<
mgb
::
CompNode
>
st
=
cg
::
get_opr_comp_node_set
(
this
);
for
(
auto
cn
:
st
)
{
auto
send_event
=
[
this
,
cn
]()
{
this
->
owner_graph
()
->
event
().
signal_inplace
<
cg
::
event
::
BeforeKernel
>
(
this
,
cn
);
this
->
owner_graph
()
->
event
().
signal_inplace
<
cg
::
event
::
AfterKernel
>
(
this
,
cn
);
};
env
.
dispatch_on_comp_node
(
cn
,
send_event
);
}
}
void
MC20RuntimeOpr
::
on_output_comp_node_stream_changed
()
{
mgb_throw
(
SystemError
,
"comp node of output should not change"
);
}
void
MC20RuntimeOpr
::
get_output_var_shape
(
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
{
const
AX_NPU_SDK_EX_ADV_IO_INFO_T
*
io_info
=
AX_NPU_SDK_EX_ADV_Get_io_info
(
m_model_handle
);
size_t
nr_inputs
=
io_info
->
nInputSize
;
for
(
size_t
i
=
0
;
i
<
nr_inputs
;
++
i
)
{
const
auto
&
tensor_meta
=
*
(
io_info
->
pInputs
[
i
].
pTensorMeta
);
auto
model_shape
=
mc20_shape_to_mgb_shape
(
tensor_meta
);
size_t
inp_idx
=
reinterpret_cast
<
size_t
>
(
i
/
m_model_batch
);
// enable mutibatch
mgb_assert
(
inp_shape
[
inp_idx
][
0
]
%
model_shape
[
0
]
==
0
&&
(
inp_shape
[
inp_idx
][
0
]
/
model_shape
[
0
])
%
m_model_batch
==
0
,
"input %zu batch is %zu, while model's input batch is %zu"
,
i
,
inp_shape
[
inp_idx
][
0
],
model_shape
[
0
]);
model_shape
[
0
]
=
inp_shape
[
inp_idx
][
0
];
mgb_assert
(
model_shape
.
eq_shape
(
inp_shape
[
inp_idx
]),
"shape mismatch of input %zu, expected: %s got: %s"
,
i
,
model_shape
.
to_string
().
c_str
(),
inp_shape
[
inp_idx
].
to_string
().
c_str
());
}
size_t
input_batch
=
(
io_info
->
pInputs
[
0
].
pTensorMeta
)
->
pShape
[
0
];
//! \warning mc20 sdk implement multi-batch by breaking an n-batch input up
//! into n 1-batch inputs
mgb_assert
(
input_batch
==
1
,
"input batch: %d, net's input batch: 1"
,
input_batch
);
AX_U32
workspace_size
=
0
;
for
(
size_t
i
=
0
;
i
<
io_info
->
nOutputSize
;
++
i
)
{
const
auto
&
tensor_meta
=
*
(
io_info
->
pOutputs
[
i
].
pTensorMeta
);
out_shape
[
i
]
=
mc20_shape_to_mgb_shape
(
tensor_meta
);
// enable mutibatch
out_shape
[
i
][
0
]
=
out_shape
[
i
][
0
]
*
inp_shape
[
0
][
0
]
/
input_batch
/
m_model_batch
;
if
(
tensor_meta
.
eMemoryType
==
AX_NPU_MT_VIRTUAL
)
{
workspace_size
+=
tensor_meta
.
nInnerSize
;
}
}
out_shape
.
back
()
=
{
workspace_size
};
}
void
MC20RuntimeOpr
::
add_input_layout_constraint
()
{
//! default contiguous
for
(
auto
i
:
input
())
{
i
->
add_layout_constraint_contiguous
();
}
}
void
MC20RuntimeOpr
::
init_output_dtype
()
{
DType
dt_mc20
,
dt_input
;
const
AX_NPU_SDK_EX_ADV_IO_INFO_T
*
io_info
=
AX_NPU_SDK_EX_ADV_Get_io_info
(
m_model_handle
);
for
(
size_t
i
=
0
;
i
<
io_info
->
nInputSize
;
++
i
)
{
dt_mc20
=
mc20_dtype_to_mgb_dtype
(
io_info
->
pInputs
[
i
].
eDType
);
size_t
inp_idx
=
reinterpret_cast
<
size_t
>
(
i
/
m_model_batch
);
dt_input
=
input
(
inp_idx
)
->
dtype
();
mgb_assert
(
dt_mc20
.
valid
()
&&
dt_input
.
valid
()
&&
dt_mc20
.
enumv
()
==
dt_input
.
enumv
(),
"dtype mismatch of input %zu: expected %s, "
"got %s"
,
i
,
dt_mc20
.
name
(),
dt_input
.
name
());
}
for
(
size_t
i
=
0
;
i
<
io_info
->
nOutputSize
;
++
i
)
{
dt_mc20
=
mc20_dtype_to_mgb_dtype
(
io_info
->
pOutputs
[
i
].
eDType
);
mgb_assert
(
dt_mc20
.
valid
(),
"output dtype checking failed: invalid dtype returned."
);
if
(
!
output
(
i
)
->
dtype
().
valid
())
output
(
i
)
->
dtype
(
dt_mc20
);
}
}
SymbolVarArray
MC20RuntimeOpr
::
make
(
SharedBuffer
buf
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
)
{
VarNodeArray
var_node_array
=
cg
::
to_var_node_array
(
src
);
auto
mc20_runtime_opr
=
std
::
make_unique
<
MC20RuntimeOpr
>
(
std
::
move
(
buf
),
INVALID_MODEL_HANDLE
,
var_node_array
,
config
);
auto
ret
=
cg
::
to_symbol_var_array
(
src
[
0
].
node
()
->
owner_graph
()
->
insert_opr
(
std
::
move
(
mc20_runtime_opr
))
->
output
());
ret
.
pop_back
();
// remove workspace
return
ret
;
}
SymbolVarArray
MC20RuntimeOpr
::
make
(
const
void
*
buf
,
size_t
size
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
)
{
mgb_throw_if
(
!
CompNode
::
get_device_count
(
CompNode
::
DeviceType
::
MC20
),
SystemError
,
"can not create MC20RuntimeOpr when mc20 is not "
"available"
);
std
::
shared_ptr
<
uint8_t
>
shptr
{
new
uint8_t
[
size
],
[](
uint8_t
*
p
)
{
delete
[]
p
;
}};
memcpy
(
shptr
.
get
(),
buf
,
size
);
SharedBuffer
buffer
{
std
::
move
(
shptr
),
size
};
return
make
(
std
::
move
(
buffer
),
src
,
config
);
}
SymbolVarArray
MC20RuntimeOpr
::
make
(
SharedBuffer
buf
,
AX_NPU_SDK_EX_HANDLE_T
model_handle
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
)
{
VarNodeArray
var_node_array
=
cg
::
to_var_node_array
(
src
);
auto
mc20_runtime_opr
=
std
::
make_unique
<
MC20RuntimeOpr
>
(
std
::
move
(
buf
),
model_handle
,
var_node_array
,
config
);
auto
ret
=
cg
::
to_symbol_var_array
(
src
[
0
].
node
()
->
owner_graph
()
->
insert_opr
(
std
::
move
(
mc20_runtime_opr
))
->
output
());
ret
.
pop_back
();
// remove workspace
return
ret
;
}
#endif // MGB_MC20
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/opr/impl/mc20_runtime_op.oprdecl
已删除
100644 → 0
浏览文件 @
d2c37fe4
decl_raw_opr
(
'mc20_runtime'
,
desc
=
'create an operator that could load and run mc20 model'
,
inputs
=
[
Doc
(
'inputs'
,
'input vars'
,
'list of :class:`.SymbolVar`'
),
Doc
(
'data_bytes'
,
'serialized mc20 model'
),
],
body
=
[
'assert isinstance(data_bytes, bytes), '
'"data must be bytes; got {}".format(type(data_bytes))'
,
'output = _mgb._Opr.mc20_runtime(inputs, data_bytes, config)'
,
'cvt_result_kwargs["explode_single"] = False'
,
],
)
# vim: ft=python
src/opr/impl/mc20_runtime_op.sereg.h
已删除
100644 → 0
浏览文件 @
d2c37fe4
/**
* \file src/opr/impl/mc20_runtime_op.sereg.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#include "megbrain/opr/mc20_runtime_op.h"
#include "megbrain/serialization/sereg.h"
#if MGB_MC20
namespace
mgb
{
using
MC20RuntimeOpr
=
opr
::
MC20RuntimeOpr
;
namespace
serialization
{
template
<
>
struct
OprLoadDumpImpl
<
MC20RuntimeOpr
,
0
>
{
static
void
dump
(
OprDumpContext
&
ctx
,
const
cg
::
OperatorNodeBase
&
opr_
)
{
auto
&&
opr
=
opr_
.
cast_final_safe
<
opr
::
MC20RuntimeOpr
>
();
auto
&&
buf
=
opr
.
buffer
();
auto
&&
name
=
opr
.
name
();
ctx
.
dump_buf_with_len
(
buf
.
data
(),
buf
.
size
());
ctx
.
dump_buf_with_len
(
name
.
c_str
(),
name
.
size
());
}
static
cg
::
OperatorNodeBase
*
load
(
OprLoadContext
&
ctx
,
const
cg
::
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
)
{
inputs
.
at
(
0
)
->
comp_node
().
activate
();
auto
buf
=
ctx
.
load_shared_buf_with_len
();
auto
name
=
ctx
.
load_shared_buf_with_len
();
std
::
string
c_name
(
reinterpret_cast
<
const
char
*>
(
name
.
data
()),
name
.
size
());
OperatorNodeConfig
&
c_config
=
const_cast
<
OperatorNodeConfig
&>
(
config
);
c_config
.
name
(
c_name
);
return
opr
::
MC20RuntimeOpr
::
make
(
std
::
move
(
buf
),
cg
::
to_symbol_var_array
(
inputs
),
c_config
)
.
at
(
0
)
.
node
()
->
owner_opr
();
}
};
}
// namespace serialization
namespace
opr
{
cg
::
OperatorNodeBase
*
opr_shallow_copy_mc20_runtime_opr
(
const
serialization
::
OprShallowCopyContext
&
ctx
,
const
cg
::
OperatorNodeBase
&
opr_
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
)
{
MGB_MARK_USED_VAR
(
ctx
);
auto
&&
opr
=
opr_
.
cast_final_safe
<
MC20RuntimeOpr
>
();
return
MC20RuntimeOpr
::
make
(
opr
.
buffer
(),
opr
.
model_handle
(),
cg
::
to_symbol_var_array
(
inputs
),
config
)
.
at
(
0
)
.
node
()
->
owner_opr
();
}
MGB_SEREG_OPR
(
MC20RuntimeOpr
,
0
);
MGB_REG_OPR_SHALLOW_COPY
(
MC20RuntimeOpr
,
opr_shallow_copy_mc20_runtime_opr
);
}
// namespace opr
}
// namespace mgb
#endif
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/opr/include/megbrain/opr/mc20_runtime_op.h
已删除
100644 → 0
浏览文件 @
d2c37fe4
/**
* \file src/opr/include/megbrain/opr/mc20_runtime_op.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
#pragma once
#include <memory>
#include "megbrain/comp_node_env.h"
#include "megbrain/graph.h"
#include "megbrain/graph/operator_node.h"
#include "megbrain/serialization/file.h"
#include "megdnn/thin/function.h"
#if MGB_MC20
#include "megbrain/mc20/mc20_memory_manager.h"
namespace
mgb
{
namespace
opr
{
MGB_DEFINE_OPR_CLASS
(
MC20RuntimeOpr
,
cg
::
OutshapePureByInshapeOpr
<
cg
::
OperatorNodeBase
>
)
// {
public:
using
SharedBuffer
=
mgb
::
serialization
::
SharedBuffer
;
void
do_execute
(
ExecEnv
&
env
)
override
;
void
get_output_var_shape
(
const
TensorShapeArray
&
inp_shape
,
TensorShapeArray
&
out_shape
)
const
override
;
void
add_input_layout_constraint
()
override
;
void
init_output_dtype
()
override
;
void
init_output_comp_node
()
override
;
void
on_output_comp_node_stream_changed
()
override
;
/**
* \brief create MC20RuntimeOpr with buf
*/
MC20RuntimeOpr
(
SharedBuffer
buf
,
AX_NPU_SDK_EX_HANDLE_T
m_model_handle
,
const
VarNodeArray
&
inputs
,
const
OperatorNodeConfig
&
config
);
~
MC20RuntimeOpr
();
const
SharedBuffer
&
buffer
()
const
{
return
m_buffer
;
}
AX_NPU_SDK_EX_HANDLE_T
model_handle
()
const
{
return
m_model_handle
;
}
static
SymbolVarArray
make
(
SharedBuffer
buf
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
=
{});
static
SymbolVarArray
make
(
const
void
*
buf
,
size_t
size
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
=
{});
static
SymbolVarArray
make
(
SharedBuffer
buf
,
AX_NPU_SDK_EX_HANDLE_T
model_handle
,
const
SymbolVarArray
&
src
,
const
OperatorNodeConfig
&
config
=
{});
private:
NodeProp
*
do_make_node_prop
()
const
override
;
void
execute_mc20
();
size_t
m_model_batch
;
SharedBuffer
m_buffer
;
constexpr
static
AX_NPU_SDK_EX_HANDLE_T
INVALID_MODEL_HANDLE
=
nullptr
;
AX_NPU_SDK_EX_HANDLE_T
m_model_handle
=
INVALID_MODEL_HANDLE
;
//! if set true, it will release model
bool
m_is_model_holder
=
false
;
};
// namespace opr
}
// namespace opr
}
// namespace mgb
#endif // MGB_MC20
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录