Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
90e7cb00
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
396
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
90e7cb00
编写于
11月 26, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(externcopr/lar): imp lar run extern c opr with dynamic param
GitOrigin-RevId: 81c147aa6cbacf0f737d880f15e3f1d7efd62ba6
上级
dbb64b46
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
153 addition
and
14 deletion
+153
-14
sdk/load-and-run/src/mgblar.cpp
sdk/load-and-run/src/mgblar.cpp
+153
-14
未找到文件。
sdk/load-and-run/src/mgblar.cpp
浏览文件 @
90e7cb00
...
...
@@ -14,29 +14,31 @@
#include "./json_loader.h"
#include "./npy.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/graph/extern_copr_api.h"
#include "megbrain/opr/dnn/convolution.h"
#include "megbrain/utils/debug.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/serialization/extern_c_opr.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/utility.h"
#include "megbrain/plugin/cpu_dispatch_checker.h"
#include "megbrain/plugin/num_range_checker.h"
#include "megbrain/plugin/opr_io_dump.h"
#include "megbrain/plugin/profiler.h"
#include "megbrain/plugin/num_range_checker.h"
#include "megbrain/plugin/cpu_dispatch_checker.h"
#include "megbrain/plugin/var_value_checker.h"
#include "megbrain/opr/io.h"
#include "megbrain/opr/utility.h"
#include "megbrain/gopt/inference.h"
#include "megbrain/comp_node_env.h"
#include "megbrain/serialization/extern_c_opr.h"
#include "megbrain/serialization/serializer.h"
#include "megbrain/utils/debug.h"
#include "megbrain/system.h"
#include "megbrain/version.h"
#include "megdnn/version.h"
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cerrno>
#include <cstdio>
#include <cctype>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <numeric>
#include <sstream>
...
...
@@ -155,6 +157,9 @@ R"__usage__(
--c-opr-lib <path>
Load external operator library. It must implement MGB_C_OPR_INIT_FUNC_STR as the
entry point.
--c-opr-lib-with-param
Run c opr lib with param, use to benchmark speed and check result, need c opr loader implemente
`copr_param_device_ptr_malloc, copr_param_device_ptr_free and copr_param_device_ptr_h2d symbols`.
--thread <num>
Number of threads to run concurrently. All threads perform the same work of
loading and executing models. This is used for test thread safety, not for
...
...
@@ -486,6 +491,20 @@ struct Args {
int
args_parse_ret
=
0
;
std
::
string
model_path
;
struct
COprArgs
{
//! for run c opr
bool
is_run_c_opr
=
false
;
bool
is_run_c_opr_with_param
=
false
;
typedef
void
(
*
COPR_PARAM_DEVICE_PTR_MEM_T
)(
ExternCOprParam
*
param
);
typedef
void
(
*
COPR_PARAM_DEVICE_PTR_H2D_T
)(
ExternCOprParam
*
param
,
void
*
host_ptr
,
size_t
extern_device_tensor_id
);
COPR_PARAM_DEVICE_PTR_MEM_T
copr_param_device_ptr_malloc
=
nullptr
;
COPR_PARAM_DEVICE_PTR_MEM_T
copr_param_device_ptr_free
=
nullptr
;
COPR_PARAM_DEVICE_PTR_H2D_T
copr_param_device_ptr_h2d
=
nullptr
;
};
COprArgs
c_opr_args
;
bool
disable_assert_throw
=
false
;
bool
share_param_mem
=
false
;
...
...
@@ -775,6 +794,65 @@ void run_test_st(Args &env) {
output_names
.
c_str
());
double
tot_time
=
0
;
for
(
uint32_t
i
=
0
;
i
<
nr_test
;
++
i
)
{
std
::
shared_ptr
<
ExternCOprParam
>
c_opr_param
;
auto
dtype_cpp2c
=
[](
DType
dtype
)
->
MGBDType
{
switch
(
dtype
.
enumv
())
{
case
DTypeEnum
::
Float32
:
return
MGB_DTYPE_FLOAT32
;
case
DTypeEnum
::
Int32
:
return
MGB_DTYPE_INT32
;
case
DTypeEnum
::
Int16
:
return
MGB_DTYPE_INT16
;
case
DTypeEnum
::
Uint8
:
return
MGB_DTYPE_UINT8
;
#if !MEGDNN_DISABLE_FLOAT16
case
DTypeEnum
::
Float16
:
return
MGB_DTYPE_FLOAT16
;
#endif
default:
mgb_throw
(
InternalError
,
"unsupported dtype for extern C API: %s"
,
dtype
.
name
());
}
};
auto
tensor_shape_to_c
=
[](
const
TensorShape
&
shape
,
MGBTensorShape
&
mgb_shape
)
{
mgb_assert
(
shape
.
ndim
<=
MGB_TENSOR_MAX_NDIM
,
"shape ndim too large: %zu"
,
shape
.
ndim
);
mgb_shape
.
ndim
=
shape
.
ndim
;
for
(
size_t
i
=
0
;
i
<
shape
.
ndim
;
++
i
)
{
mgb_shape
.
shape
[
i
]
=
shape
[
i
];
}
};
if
(
env
.
c_opr_args
.
is_run_c_opr_with_param
)
{
c_opr_param
=
std
::
make_shared
<
ExternCOprParam
>
();
memset
(
c_opr_param
.
get
(),
0
,
sizeof
(
ExternCOprParam
));
//! we just test input on npu case, do not test output on
//! npu case, so we just init input shape and type
c_opr_param
->
nr_input
=
inp_tensors
.
size
();
c_opr_param
->
input
=
(
ExternDeviceTensor
*
)
malloc
(
sizeof
(
ExternDeviceTensor
)
*
inp_tensors
.
size
());
memset
(
c_opr_param
->
input
,
0
,
sizeof
(
ExternDeviceTensor
)
*
inp_tensors
.
size
());
//! init input ExternDeviceTensor shape and dtype
for
(
size_t
input_index
=
0
;
input_index
<
inp_tensors
.
size
();
input_index
++
)
{
auto
&
mgb_tensor_layout
=
c_opr_param
->
input
[
input_index
].
layout
;
auto
host_tensor_nd_p
=
inp_tensors
[
input_index
].
second
;
mgb_tensor_layout
.
dtype
=
dtype_cpp2c
(
host_tensor_nd_p
->
dtype
());
tensor_shape_to_c
(
inp_tensors
[
input_index
].
second
->
shape
(),
mgb_tensor_layout
.
shape
);
}
c_opr_param
->
nr_output
=
0
;
//! now call copr_param_device_ptr_malloc to malloc
//! device_ptr
env
.
c_opr_args
.
copr_param_device_ptr_malloc
(
c_opr_param
.
get
());
}
loader
=
serialization
::
GraphLoader
::
make
(
loader
->
reset_file
(),
loader
->
format
());
auto
testcase
=
loader
->
load
(
env
.
load_config
,
false
);
...
...
@@ -782,8 +860,18 @@ void run_test_st(Args &env) {
for
(
size_t
i
=
0
;
i
<
inp_tensors
.
size
();
++
i
)
{
auto
&&
opr
=
testcase
.
output_var_list
[
i
].
node
()
->
owner_opr
()
->
cast_final_safe
<
opr
::
SharedDeviceTensor
>
();
inp_tensors
[
i
].
second
->
copy_from
(
HostTensorND
::
make_proxy
(
*
opr
.
dev_data
()));
if
(
env
.
c_opr_args
.
is_run_c_opr_with_param
)
{
//! now call copr_param_device_ptr_h2d to fill data
env
.
c_opr_args
.
copr_param_device_ptr_h2d
(
c_opr_param
.
get
(),
opr
.
dev_data
()
->
raw_ptr
(),
i
);
}
else
{
inp_tensors
[
i
].
second
->
copy_from
(
HostTensorND
::
make_proxy
(
*
opr
.
dev_data
()));
}
}
//! now config c opr dynamic param
if
(
env
.
c_opr_args
.
is_run_c_opr_with_param
)
{
config_extern_c_opr_dynamic_param
(
func
,
c_opr_param
);
}
if
(
!
i
)
{
...
...
@@ -796,10 +884,18 @@ void run_test_st(Args &env) {
continue
;
}
tot_time
+=
run_iters
(
i
);
//! now free c opr device_ptr
if
(
env
.
c_opr_args
.
is_run_c_opr_with_param
)
{
env
.
c_opr_args
.
copr_param_device_ptr_free
(
c_opr_param
.
get
());
free
(
c_opr_param
->
input
);
}
}
printf
(
"=== total time: %.3fms
\n
"
,
tot_time
);
}
else
if
(
not
env
.
data_files
.
empty
())
{
mgb_assert
(
!
env
.
c_opr_args
.
is_run_c_opr_with_param
,
"run c opr with param only support dump_with_testcase!!"
);
auto
&
tensormap
=
env
.
load_ret
.
tensor_map
;
DataParser
parser
;
...
...
@@ -824,6 +920,8 @@ void run_test_st(Args &env) {
printf
(
"=== going to run input for %d times
\n
"
,
env
.
nr_run
);
run_iters
(
0
);
}
else
{
mgb_assert
(
!
env
.
c_opr_args
.
is_run_c_opr_with_param
,
"run c opr with param only support dump_with_testcase!!"
);
// run speed test for a raw mgb graph
mgb_assert
(
env
.
load_ret
.
tensor_map
.
empty
(),
"model should not require input values; input vars should be "
...
...
@@ -879,6 +977,16 @@ int mgb_load_and_run_main(int argc, char** argv) {
}
auto
env
=
Args
::
from_argv
(
argc
,
argv
);
if
(
env
.
c_opr_args
.
is_run_c_opr_with_param
)
mgb_assert
(
env
.
c_opr_args
.
is_run_c_opr
&&
env
.
c_opr_args
.
copr_param_device_ptr_malloc
&&
env
.
c_opr_args
.
copr_param_device_ptr_free
&&
env
.
c_opr_args
.
copr_param_device_ptr_h2d
,
"--c-opr-lib-with-param need config with --c-opr-lib, also "
"extern c opr loader need implemente "
"copr_param_device_ptr_malloc, copr_param_device_ptr_free "
"and copr_param_device_ptr_h2d symbols"
);
if
(
env
.
args_parse_ret
!=
0
)
{
return
env
.
args_parse_ret
;
}
...
...
@@ -1219,6 +1327,7 @@ Args Args::from_argv(int argc, char **argv) {
}
if
(
!
strcmp
(
argv
[
i
],
"--c-opr-lib"
))
{
++
i
;
ret
.
c_opr_args
.
is_run_c_opr
=
true
;
mgb_assert
(
i
<
argc
,
"value not given for --c-opr-lib"
);
auto
handle
=
dlopen
(
argv
[
i
],
RTLD_LAZY
);
mgb_assert
(
handle
,
"failed to open c opr lib %s: %s"
,
...
...
@@ -1231,6 +1340,36 @@ Args Args::from_argv(int argc, char **argv) {
reinterpret_cast
<
void
*>
(
&
mgb_get_extern_c_opr_api_versioned
));
printf
(
"loaded C opr library: %s
\n
"
,
argv
[
i
]);
entry
=
"copr_param_device_ptr_malloc"
;
func
=
dlsym
(
handle
,
entry
);
if
(
func
)
{
printf
(
"get %s from: %s
\n
"
,
entry
,
argv
[
i
]);
ret
.
c_opr_args
.
copr_param_device_ptr_malloc
=
reinterpret_cast
<
COprArgs
::
COPR_PARAM_DEVICE_PTR_MEM_T
>
(
func
);
}
entry
=
"copr_param_device_ptr_free"
;
func
=
dlsym
(
handle
,
entry
);
if
(
func
)
{
printf
(
"get %s from: %s
\n
"
,
entry
,
argv
[
i
]);
ret
.
c_opr_args
.
copr_param_device_ptr_free
=
reinterpret_cast
<
COprArgs
::
COPR_PARAM_DEVICE_PTR_MEM_T
>
(
func
);
}
entry
=
"copr_param_device_ptr_h2d"
;
func
=
dlsym
(
handle
,
entry
);
if
(
func
)
{
printf
(
"get %s from: %s
\n
"
,
entry
,
argv
[
i
]);
ret
.
c_opr_args
.
copr_param_device_ptr_h2d
=
reinterpret_cast
<
COprArgs
::
COPR_PARAM_DEVICE_PTR_H2D_T
>
(
func
);
}
continue
;
}
if
(
!
strcmp
(
argv
[
i
],
"--c-opr-lib-with-param"
))
{
ret
.
c_opr_args
.
is_run_c_opr_with_param
=
true
;
continue
;
}
#endif
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录