Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
4adba378
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
4adba378
编写于
3月 03, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(lite): add example script and some small change for lar
GitOrigin-RevId: a28ed2f27a8d9fe8fb1ed2960477aeffd35f1857
上级
87f00232
变更
15
显示空白变更内容
内联
并排
Showing
15 changed file
with
1223 addition
and
32 deletion
+1223
-32
lite/load_and_run/examples/README.md
lite/load_and_run/examples/README.md
+15
-0
lite/load_and_run/examples/example.sh
lite/load_and_run/examples/example.sh
+437
-0
lite/load_and_run/examples/script/add_demo.py
lite/load_and_run/examples/script/add_demo.py
+57
-0
lite/load_and_run/examples/script/conv_demo.py
lite/load_and_run/examples/script/conv_demo.py
+42
-0
lite/load_and_run/examples/script/dump_trt.py
lite/load_and_run/examples/script/dump_trt.py
+37
-0
lite/load_and_run/examples/script/gen_trt_model.sh
lite/load_and_run/examples/script/gen_trt_model.sh
+33
-0
lite/load_and_run/examples/script/mge_input_data.py
lite/load_and_run/examples/script/mge_input_data.py
+33
-0
lite/load_and_run/examples/script/resnet50_mgb.py
lite/load_and_run/examples/script/resnet50_mgb.py
+153
-0
lite/load_and_run/examples/script/resnet50_mge.py
lite/load_and_run/examples/script/resnet50_mge.py
+351
-0
lite/load_and_run/src/options/device_options.cpp
lite/load_and_run/src/options/device_options.cpp
+3
-1
lite/load_and_run/src/options/extern_c_opr_options.cpp
lite/load_and_run/src/options/extern_c_opr_options.cpp
+3
-1
lite/load_and_run/src/options/io_options.cpp
lite/load_and_run/src/options/io_options.cpp
+7
-2
lite/load_and_run/src/options/optimize_options.cpp
lite/load_and_run/src/options/optimize_options.cpp
+0
-17
lite/load_and_run/src/options/plugin_options.cpp
lite/load_and_run/src/options/plugin_options.cpp
+47
-7
lite/load_and_run/src/strategys/strategy_normal.cpp
lite/load_and_run/src/strategys/strategy_normal.cpp
+5
-4
未找到文件。
lite/load_and_run/examples/README.md
0 → 100644
浏览文件 @
4adba378
# load and run example scripts
this is some typical examples for using of load and run, you can use
```
bash
./example.sh
```
to run those examples, more details is in the script
the usage of script can be see by run this scirpt with:
```
bash
./example.sh
-h
```
lite/load_and_run/examples/example.sh
0 → 100755
浏览文件 @
4adba378
#!/bin/bash
OLD_IFS
=
"
$IFS
"
IFS
=
$'
\n
'
TYPICAL_MODEL_DATD
=
"model_source/resnet50_b1_float32_without_data.mge --input
\"
data:input_data/resnet50_input.npy
\"
"
DEVICE_DESC
=
""
WORK_DIR_PATH
=
"."
RUN_ARM_DEVICE
=
"false"
RUN_TARGET
=
"diff_model"
ONLY_PREPARE_MODEL
=
"false"
MODEL_PREAPRED
=
"false"
ONLY_BUILD
=
"false"
LAR_BUILT
=
"false"
CLEAN_ALL
=
"false"
RUN_TARGETS
=(
"diff_model"
)
RUN_TARGETS+
=(
"diff_device"
)
RUN_TARGETS+
=(
"fast_run"
)
RUN_TARGETS+
=(
"io"
)
RUN_TARGETS+
=(
"layout"
)
RUN_TARGETS+
=(
"optimize"
)
RUN_TARGETS+
=(
"plugin"
)
RUN_TARGETS+
=(
"all"
)
function
usage
()
{
echo
"
$0
args1 args2 .."
echo
"available args detail:"
echo
"-p : prepare example model "
echo
"-b : build load_and_run for x86/armv7/arm64 cpu and CUDA"
echo
"-t : set the ssh arm device "
echo
"-w : set the arm device workspace dir"
echo
"-c : clean all"
echo
"-a : run all test"
echo
"-e : set the running target for test (details use
\"
-e
\"
to see)"
echo
"-h : show usage"
exit
-1
}
while
getopts
"pbcahe:w:t:"
arg
do
case
$arg
in
t
)
DEVICE_DESC
=
$OPTARG
RUN_ARM_DEVICE
=
"true"
echo
"config arm device DEVICE_DESC to
${
DEVICE_DESC
}
"
;;
w
)
WORK_DIR_PATH
=
$OPTARG
echo
"config arm device WORK_DIR_PATH to
${
WORK_DIR_PATH
}
"
;;
e
)
tmp_target
=
null
for
target
in
${
RUN_TARGETS
[@]
}
;
do
if
[
"
$target
"
=
"
$OPTARG
"
]
;
then
echo
"CONFIG BUILD RUN_TARGET to :
$OPTARG
"
tmp_target
=
$OPTARG
RUN_TARGET
=
$OPTARG
break
fi
done
if
[
"
$tmp_target
"
=
"null"
]
;
then
echo
"ERR args for target (-e)"
echo
"available target usage :"
for
target
in
${
RUN_TARGETS
[@]
}
;
do
echo
" -e
$target
"
done
exit
-1
fi
;;
h
)
echo
"show usage"
usage
;;
a
)
echo
"config RUN_TARGET=all"
RUN_TARGET
=
"all"
;;
c
)
echo
"clean all directory generated by script"
CLEAN_ALL
=
"true"
;;
b
)
echo
"run build"
ONLY_BUILD
=
"true"
;;
p
)
echo
"prepare model and input"
ONLY_PREPARE_MODEL
=
"true"
;;
?
)
echo
"unkonw argument"
usage
;;
esac
done
function
prepare_model_and_data
(){
rm
-rf
model_source
&&
mkdir
model_source
# dump mgb model
python3 script/resnet50_mgb.py
-o
model_source/resnet50.pkl
../dump_with_testcase.py model_source/resnet50.pkl
-o
model_source/resnet50_with_data.mgb
-d
"#rand(0, 255)"
--no-assert
# prepare simple add model
python3 script/add_demo.py
--dir
model_source
python3 script/conv_demo.py
--dir
model_source
#generate trt model
script/gen_trt_model.sh
#prepare mge model
python3 script/resnet50_mge.py
--dir
model_source
python3 script/resnet50_mge.py
--dir
model_source
-d
int8
python3 script/resnet50_mge.py
--dir
model_source
--inputs
"#rand(0,255)"
#make input_data
rm
-rf
input_data
&&
mkdir
input_data
python3 script/mge_input_data.py
rm
-rf
tmpdir
&&
mkdir
tmpdir
}
function
build_lar
(){
# build cpu and cuda version
../../../scripts/cmake-build/host_build.sh
-r
-t
-e
load_and_run
#WARNING:config the cuda environment before compile
../../../scripts/cmake-build/host_build.sh
-c
-t
-e
load_and_run
# # build arm version
../../../scripts/cmake-build/cross_build_android_arm_inference.sh
-r
-a
arm64-v8a
-e
load_and_run
../../../scripts/cmake-build/cross_build_android_arm_inference.sh
-r
-a
armeabi-v7a
-e
load_and_run
# link or for test
ln
-s
../../../build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cpu
ln
-s
../../../build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cuda
cp
../../../build_dir/android/arm64-v8a/Release/build/lite/load_and_run/load_and_run ./lar_arm64
cp
../../../build_dir/android/armeabi-v7a/Release/build/lite/load_and_run/load_and_run ./lar_armv7
}
function
set_arm_device_and_upload
(){
DEVICE_DESC
=
"
${
1
}
"
WORK_DIR_PATH
=
"
${
2
}
"
RUN_ARM_DEVICE
=
"
${
3
}
"
cmd
=
"rsync -aP -zz ./lar_arm64 ./lar_armv7 model_source/resnet50_b1_float32_without_data.mge input_data/resnet50_input.npy
$DEVICE_DESC
:
$WORK_DIR_PATH
/"
echo
$cmd
bash
-c
"
$cmd
"
}
function
test_different_model
(){
CmdArray
=(
"./lar_cpu model_source/resnet50_with_data.mgb"
)
CmdArray+
=(
"./lar_cpu model_source/resnet50_b1_float32_with_data.mge"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--lite"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
"
$cmd
"
bash
-c
"
$cmd
"
done
}
function
test_different_device
(){
#dispatch时,计算任务会加入一个工作队列,由队列统一管理执行 均值 131.278 ms 标准差 15.197ms m_asyc_exec异步执行
CmdArray
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--cpu"
)
#dispatch时,计算任务直接执行 均值 131.875 ms 标准差 7.758ms m_asyc_exec同步执行
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--cpu-default"
)
#多线程运行 1~8平均运行时间(ms):129.611, 84.266, 76.963, 55.212, 69.283, 77.338, 58.386, 64.585
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--multithread 4"
)
#主线程锁核,其他任务在线程池中的线程上运行 132.614, 83.095, 69.792, 54.452, 48.890, 48.206, 46.386, 53.908
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--multithread-default 4"
)
#cpu多线程绑核(x86上绑核影响不大)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--multithread 2 --multi-thread-core-ids 1,5"
)
#xpu 设置为cpu上运行 132.740 ms comp_node:cpu
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--cpu"
)
#xpu 设置为cuda上运行 6.495 ms comp_node:gpu
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--cuda"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
function
test_fast_run
(){
CmdArray
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--full-run"
)
#fast run 搜参
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run --fast-run-algo-policy tmpdir/algo_cache_file"
)
#fast run 带参执行
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run-algo-policy tmpdir/algo_cache_file"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run --reproducible"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run --fast-run-shared-batch-size 1"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--fast-run --binary-equal-between-batch"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
}
function
test_io
(){
rm
-rf
tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_out_info_cuda tmpdir/io_info.txt
mkdir
tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_io_info_cuda
CmdArray
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
input_data/add_demo_input.json
\"
"
)
#! the model must support input with nhwc shape
CmdArray+
=(
"./lar_cpu model_source/resnet50_b1_int8_without_data.mge --input
\"
data:input_data/cat.ppm
\"
"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--io-dump tmpdir/io_info.txt --iter 1 --warmup-iter 0"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--io-dump-stdout --iter 1 --warmup-iter 0"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--io-dump-stderr --iter 1 --warmup-iter 0"
)
#different data in the given directory the name is the var id which is the same with txt-dump information
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--bin-io-dump tmpdir/bin_io_info --iter 1 --warmup-iter 0"
)
CmdArray+
=(
"./lar_cuda model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--bin-io-dump tmpdir/bin_io_info_cuda --iter 1 --warmup-iter 0"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--bin-out-dump tmpdir/bin_out_info --iter 1 --warmup-iter 0"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--copy-to-host"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
#compare the binary io information
python3 ../../../imperative/python/megengine/tools/compare_binary_iodump.py tmpdir/bin_io_info tmpdir/bin_io_info_cuda
}
function
test_layout_related
(){
# very little speed up
CmdArray
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-nchw4"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-chwn4"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-nchw32"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-nchw64"
)
#speed up
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--enable-nchw88"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--cuda --layout-transform cuda"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--cuda --layout-transform cuda --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge"
)
CmdArray+
=(
"./lar_cuda model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--cpu --layout-transform cpu"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--cpu --layout-transform cpu --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge"
)
CmdArray+
=(
"./lar_cpu model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
if
[
${
RUN_ARM_DEVICE
}
==
"true"
]
;
then
#speed up
CmdArray
=(
"./lar_arm64 resnet50_b1_float32_without_data.mge --input
\"
data:resnet50_input.npy
\"
--cpu --enable-nchw44"
)
#speed up
CmdArray+
=(
"./lar_arm64 resnet50_b1_float32_without_data.mge --input
\"
data:resnet50_input.npy
\"
--cpu --enable-nchw44-dot"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
ssh
-t
$DEVICE_DESC
"unset LD_PRELOAD && cd
$WORK_DIR_PATH
&& LD_LIBRARY_PATH=./
$cmd
"
done
else
echo
"SET arm device ON :
$RUN_ARM_DEVICE
"
fi
}
function
test_optimize
(){
CmdArray
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--enable-fuse-preprocess"
)
#warm up speed up
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-fuse-conv-bias-nonlinearity"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable-fuse-conv-bias-with-z"
)
CmdArray+
=(
"./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt"
)
CmdArray+
=(
"./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt --tensorrt-cache tmpdir/TRT_cache"
)
CmdArray+
=(
"./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt-cache tmpdir/TRT_cache"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--no-sanity-check --record-comp-seq2"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--disable_mem_opt"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--workspace_limit 10000"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--fake-first"
)
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--enable_jit "
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
}
function
test_plugin
(){
rm
-rf
tmpdir/staticMemInfoDir tmpdir/staticMemInfoDirLogs
mkdir
tmpdir/staticMemInfoDir
CmdArray
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--check-dispatch"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--check-var-value 5:0"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--range 2"
)
CmdArray+
=(
"./lar_cpu model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--profile tmpdir/opr_profile.json
"
)
CmdArray+
=(
"./lar_cuda model_source/add_demo_f32_without_data.mge --input
\"
data:[2,3,4]
\"
--profile-host tmpdir/opr_profile_host.json"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--model-info"
)
CmdArray+
=(
"./lar_cpu
$TYPICAL_MODEL_DATD
--verbose"
)
CmdArray+
=(
"./lar_cpu model_source/resnet50_with_data.mgb --disable-assert-throw"
)
# wait gdb attach to given PID
# CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --wait-gdb")
CmdArray+
=(
"./lar_cuda
$TYPICAL_MODEL_DATD
--get-static-mem-info tmpdir/staticMemInfoDir"
)
for
cmd
in
${
CmdArray
[@]
}
;
do
echo
$cmd
bash
-c
"
$cmd
"
done
# view the graph with given url (usally: http://localhost:6006/)
# mkdir tmpdir/staticMemInfoDirLogs && python3 ../../../imperative/python/megengine/tools/graph_info_analyze.py -i tmpdir/staticMemInfoDir -o tmpdir/staticMemInfoDirLogs
# pip3 install tensorboard && tensorboard --logdir tmpdir/staticMemInfoDirLogs
}
function
clean
(){
rm
-rf
tmpdir model_source input_data lar_cpu lar_cuda lar_arm64 lar_armv7
}
function
main
(){
if
[
${
CLEAN_ALL
}
==
"true"
]
;
then
clean
exit
0
fi
if
[
${
ONLY_PREPARE_MODEL
}
==
"true"
]
;
then
prepare_model_and_data
MODEL_PREAPRED
=
"true"
exit
0
fi
if
[
${
ONLY_BUILD
}
==
"true"
]
;
then
build_lar
LAR_BUILT
=
"true"
exit
0
fi
if
[
${
RUN_ARM_DEVICE
}
==
"true"
]
;
then
set_arm_device_and_upload
$DEVICE_DESC
"
$WORK_DIR_PATH
"
"true"
fi
if
[
${
MODEL_PREAPRED
}
!=
"true"
]
;
then
CHECK_MODEL
=
$(
find
.
-name
add_demo_input.json
)
if
[
${
CHECK_MODEL
}
==
""
]
;
then
prepare_model_and_data
MODEL_PREAPRED
=
"true"
fi
fi
if
[
${
LAR_BUILT
}
!=
"true"
]
;
then
CHECK_LAR
=
$(
find
.
-name
lar_armv7
)
if
[
${
CHECK_LAR
}
==
""
]
;
then
build_lar
LAR_BUILT
=
"true"
fi
fi
if
[
${
RUN_TARGET
}
==
"diff_model"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_different_model
fi
if
[
${
RUN_TARGET
}
==
"diff_device"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_different_device
fi
if
[
${
RUN_TARGET
}
==
"fast_run"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_fast_run
fi
if
[
${
RUN_TARGET
}
==
"io"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_io
fi
if
[
${
RUN_TARGET
}
==
"layout"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_layout_related
fi
if
[
${
RUN_TARGET
}
==
"optimize"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_optimize
fi
if
[
${
RUN_TARGET
}
==
"plugin"
-o
${
RUN_TARGET
}
==
"all"
]
;
then
test_plugin
fi
}
main
IFS
=
$OLD_IFS
\ No newline at end of file
lite/load_and_run/examples/script/add_demo.py
0 → 100755
浏览文件 @
4adba378
#!/usr/bin/env python3
import
argparse
import
math
import
megengine.functional
as
F
import
megengine.hub
as
hub
import
megengine.module
as
M
import
numpy
as
np
from
megengine
import
jit
,
tensor
,
Parameter
class
Simple
(
M
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
a
=
Parameter
([
0
,
1
,
2
],
dtype
=
np
.
float32
)
def
forward
(
self
,
x
):
x
=
x
+
self
.
a
return
x
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"dump mge model for add_demo"
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
)
parser
.
add_argument
(
"--inputs"
,
help
=
"set the inputs data to get a model with testcase"
,
default
=
""
,
type
=
str
,
)
parser
.
add_argument
(
"--dir"
,
help
=
"set the dir where the model to dump"
,
default
=
"."
,
type
=
str
,
)
args
=
parser
.
parse_args
()
net
=
Simple
()
net
.
eval
()
@
jit
.
trace
(
symbolic
=
True
,
capture_as_const
=
True
)
def
fun
(
data
):
return
net
(
data
)
data
=
tensor
([
3
,
4
,
5
])
fun
(
data
)
if
args
.
inputs
==
""
:
fun
.
dump
(
args
.
dir
+
"/add_demo_f32_without_data.mge"
,
arg_names
=
[
"data"
],
no_assert
=
True
,
)
else
:
fun
.
dump
(
args
.
dir
+
"/add_demo_f32_with_data.mge"
,
arg_names
=
[
"data"
],
input_data
=
[
args
.
inputs
],
no_assert
=
True
,
)
\ No newline at end of file
lite/load_and_run/examples/script/conv_demo.py
0 → 100755
浏览文件 @
4adba378
#!/usr/bin/env python3
import
argparse
import
math
import
megengine.functional
as
F
import
megengine.module
as
M
import
numpy
as
np
from
megengine
import
jit
,
tensor
class
ConvNet
(
M
.
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
M
.
Conv2d
(
in_channels
=
3
,
out_channels
=
1
,
kernel_size
=
3
,
bias
=
False
)
def
forward
(
self
,
input
):
x
=
self
.
conv1
(
input
)
return
x
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"dump mge model for add_demo"
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
)
parser
.
add_argument
(
"--dir"
,
help
=
"set the dir where the model to dump"
,
default
=
"."
,
type
=
str
,
)
args
=
parser
.
parse_args
()
net
=
ConvNet
()
net
.
eval
()
@
jit
.
trace
(
symbolic
=
True
,
capture_as_const
=
True
)
def
fun
(
data
):
return
net
(
data
)
inp
=
tensor
(
np
.
arange
(
0
,
96
).
astype
(
"float32"
).
reshape
(
2
,
3
,
4
,
4
))
out
=
fun
(
inp
)
fun
.
dump
(
args
.
dir
+
"/conv_demo_f32_without_data.mge"
,
arg_names
=
[
"data"
],
no_assert
=
True
)
\ No newline at end of file
lite/load_and_run/examples/script/dump_trt.py
0 → 100644
浏览文件 @
4adba378
#!/usr/bin/env python3
from
megskull.network
import
RawNetworkBuilder
import
megskull.opr.all
as
O
from
megskull.opr.external
import
TensorRTRuntimeOpr
from
meghair.utils.io
import
dump
import
argparse
def
str2tuple
(
x
):
x
=
x
.
split
(
','
)
x
=
[
int
(
a
)
for
a
in
x
]
x
=
tuple
(
x
)
return
x
def
make_network
(
model
,
isize
):
data
=
[
O
.
DataProvider
(
'input{}'
.
format
(
i
),
shape
=
isizes
[
i
])
for
i
in
range
(
len
(
isizes
))]
f
=
open
(
model
,
'rb'
)
engine
=
f
.
read
()
opr
=
TensorRTRuntimeOpr
(
data
,
engine
,
1
)
net
=
RawNetworkBuilder
(
inputs
=
[
data
],
outputs
=
opr
.
outputs
)
return
net
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
dest
=
'model'
)
parser
.
add_argument
(
dest
=
'output'
)
parser
.
add_argument
(
'--isize'
,
help
=
'input sizes. '
'e.g. for models with two (1,3,224,224) inputs, '
'the option --isize="1,3,224,224;1,3,224,224" should be used'
)
args
=
parser
.
parse_args
()
isizes
=
[
str2tuple
(
x
)
for
x
in
args
.
isize
.
split
(
';'
)]
net
=
make_network
(
args
.
model
,
isizes
)
dump
(
net
,
args
.
output
)
\ No newline at end of file
lite/load_and_run/examples/script/gen_trt_model.sh
0 → 100755
浏览文件 @
4adba378
#!/bin/bash
CUR_DIR
=
"
$(
cd
"
$(
dirname
$0
)
"
>
/dev/null 2>&1
;
pwd
-P
)
"
# find correct trtexec version, only works for internal ci and brainpp env setups
CUDA_VERSION
=
$(
nvcc
--version
|
grep
-o
"[0-9].
\.
[0-9]*"
|
head
-n
1
)
SEARCH_PATH
=
$(
echo
`
which nvcc | xargs
dirname
`
/../../
)
TRT_CANDIDATE
=
$(
find
`
cd
$SEARCH_PATH
;
pwd
`
-name
"trtexec"
|
grep
"bin/trtexec"
|
grep
$CUDA_VERSION
)
TRT_CANDIDATE
=
${
TRT_CANDIDATE
%
$'
\n
'
*
}
TRT_LIB_PATH
=
$(
readlink
-f
"
`
dirname
$TRT_CANDIDATE
`
/../lib"
)
MODELS_PATH
=
$(
readlink
-f
"
${
CUR_DIR
}
/../model_source"
)
# generate mge model
rm
-rf
$MODELS_PATH
/conv_demo_f32_without_data.mge
python3
${
CUR_DIR
}
/conv_demo.py
--dir
$MODELS_PATH
# generate caffe model with mge convert
# INSTALL mgeconvert:
# python3 -m pip install git+https://github.com/MegEngine/mgeconvert.git --user --install-option="--targets=caffe"
rm
-rf
$MODELS_PATH
/conv_demo.prototxt
$MODELS_PATH
/conv_demo.caffemodel
convert mge_to_caffe
-i
$MODELS_PATH
/conv_demo_f32_without_data.mge
-c
$MODELS_PATH
/conv_demo.prototxt
-b
$MODELS_PATH
/conv_demo.caffemodel
# generate trt model
rm
-rf
$MODELS_PATH
/conv_demo.trt
echo
"WARNING: config cudnn and cublas path before run trtexec"
export
LD_LIBRARY_PATH
=
$LD_LIBRARY_PATH
:
$TRT_LIB_PATH
echo
$LD_LIBRARY_PATH
$TRT_CANDIDATE
--deploy
=
"
$MODELS_PATH
/conv_demo.prototxt"
--model
=
"
$MODELS_PATH
/conv_demo.caffemodel"
--output
=
"conv1_Convolution"
--batch
=
1
--saveEngine
=
"
$MODELS_PATH
/conv_demo.trt"
# redump trt model into mgb model
rm
-rf
$MODELS_PATH
/trt_conv_demo.pkl
$MODELS_PATH
/trt_conv_demo_with_data.mgb
python3
$CUR_DIR
/dump_trt.py
$MODELS_PATH
/conv_demo.trt
$MODELS_PATH
/trt_conv_demo.pkl
--isize
=
"1,3,4,4"
$CUR_DIR
/../../dump_with_testcase.py
$MODELS_PATH
/trt_conv_demo.pkl
-o
$MODELS_PATH
/trt_conv_demo_with_data.mgb
-d
"#rand(0, 255)"
--no-assert
\ No newline at end of file
lite/load_and_run/examples/script/mge_input_data.py
0 → 100755
浏览文件 @
4adba378
#!/usr/bin/env python3
import
numpy
as
np
import
cv2
import
megengine.data.transform
as
T
import
megengine.functional
as
F
import
json
import
urllib
url
,
filename
=
(
"https://data.megengine.org.cn/images/cat.jpg"
,
"input_data/cat.jpg"
)
try
:
urllib
.
URLopener
().
retrieve
(
url
,
filename
)
except
:
urllib
.
request
.
urlretrieve
(
url
,
filename
)
# numpy data
data
=
np
.
random
.
rand
(
1
,
3
,
224
,
224
)
np
.
save
(
"input_data/resnet50_input_uint8.npy"
,
data
.
astype
(
np
.
uint8
))
np
.
save
(
"input_data/resnet50_input.npy"
,
data
.
astype
(
np
.
float32
))
#ppm data
image
=
cv2
.
imread
(
"input_data/cat.jpg"
)
transform
=
T
.
Compose
([
T
.
Resize
(
256
),
T
.
CenterCrop
(
224
),
])
processed_img
=
transform
.
apply
(
image
)
cv2
.
imwrite
(
"input_data/cat.ppm"
,
processed_img
)
#json
data_obj
=
{
"shape"
:
[
1
,
3
],
"type"
:
"int32"
,
"raw"
:
[
2
,
3
,
4
]
}
with
open
(
"input_data/add_demo_input.json"
,
"w"
)
as
f
:
json
.
dump
({
"data"
:
data_obj
},
f
)
\ No newline at end of file
lite/load_and_run/examples/script/resnet50_mgb.py
0 → 100755
浏览文件 @
4adba378
#!/usr/bin/env python3
import
megbrain
as
mgb
from
megskull.graph
import
FpropEnv
import
megskull
as
mgsk
from
megskull.opr.compatible.caffepool
import
CaffePooling2D
from
megskull.opr.arith
import
ReLU
from
megskull.opr.all
import
(
DataProvider
,
Conv2D
,
Pooling2D
,
FullyConnected
,
Softmax
,
Dropout
,
BatchNormalization
,
CrossEntropyLoss
,
ElementwiseAffine
,
WarpPerspective
,
WarpPerspectiveWeightProducer
,
WeightDecay
,
ParamProvider
,
ConvBiasActivation
,
ElemwiseMultiType
)
from
megskull.network
import
RawNetworkBuilder
from
megskull.utils.debug
import
CallbackInjector
import
megskull.opr.helper.param_init
as
pinit
from
megskull.opr.helper.elemwise_trans
import
Identity
from
megskull.opr.netsrc
import
DataProvider
from
megskull.opr.cnn
import
Conv2D
,
Pooling2D
,
FullyConnected
,
Softmax
,
Conv2DImplHelper
from
megskull.opr.loss
import
CrossEntropyLoss
from
megskull.opr.regularizer
import
Dropout
,
BatchNormalization
from
megskull.opr.arith
import
Add
,
ReLU
from
megskull.opr.netsrc
import
ConstProvider
from
megskull.network
import
RawNetworkBuilder
import
numpy
as
np
from
megskull.network
import
RawNetworkBuilder
,
NetworkVisitor
from
megskull.graph
import
iter_dep_opr
from
megskull.utils.misc
import
get_2dshape
import
functools
import
re
import
fnmatch
import
argparse
import
sys
def
create_bn_relu_float
(
conv_name
,
f_in
,
ksize
,
stride
,
pad
,
num_outputs
,
has_relu
,
args
):
f
=
Conv2D
(
conv_name
,
f_in
,
kernel_shape
=
ksize
,
stride
=
stride
,
padding
=
pad
,
output_nr_channel
=
num_outputs
,
nonlinearity
=
mgsk
.
opr
.
helper
.
elemwise_trans
.
Identity
())
if
has_relu
:
f
=
ReLU
(
f
)
return
f
def
get_num_inputs
(
feature
,
format
):
if
format
==
'NCHW'
:
return
feature
.
partial_shape
[
1
]
else
:
assert
format
==
'NCHW4'
return
feature
.
partial_shape
[
1
]
*
4
def
create_bn_relu
(
prefix
,
f_in
,
ksize
,
stride
,
pad
,
num_outputs
,
has_relu
,
conv_name_fun
,
args
):
if
conv_name_fun
:
conv_name
=
conv_name_fun
(
prefix
)
else
:
conv_name
=
prefix
return
create_bn_relu_float
(
conv_name
,
f_in
,
ksize
,
stride
,
pad
,
num_outputs
,
has_relu
,
args
)
def
create_bottleneck
(
prefix
,
f_in
,
stride
,
num_outputs1
,
num_outputs2
,
args
,
has_proj
=
False
):
proj
=
f_in
if
has_proj
:
proj
=
create_bn_relu
(
prefix
,
f_in
,
ksize
=
1
,
stride
=
stride
,
pad
=
0
,
num_outputs
=
num_outputs2
,
has_relu
=
False
,
conv_name_fun
=
lambda
p
:
"interstellar{}_branch1"
.
format
(
p
),
args
=
args
)
f
=
create_bn_relu
(
prefix
,
f_in
,
ksize
=
1
,
stride
=
1
,
pad
=
0
,
num_outputs
=
num_outputs1
,
has_relu
=
True
,
conv_name_fun
=
lambda
p
:
"interstellar{}_branch2a"
.
format
(
p
),
args
=
args
)
f
=
create_bn_relu
(
prefix
,
f
,
ksize
=
3
,
stride
=
stride
,
pad
=
1
,
num_outputs
=
num_outputs1
,
has_relu
=
True
,
conv_name_fun
=
lambda
p
:
"interstellar{}_branch2b"
.
format
(
p
),
args
=
args
)
f
=
create_bn_relu
(
prefix
,
f
,
ksize
=
1
,
stride
=
1
,
pad
=
0
,
num_outputs
=
num_outputs2
,
has_relu
=
False
,
conv_name_fun
=
lambda
p
:
"interstellar{}_branch2c"
.
format
(
p
),
args
=
args
)
f
=
ReLU
(
f
+
proj
)
return
f
def
get
(
args
):
img_size
=
224
num_inputs
=
3
data
=
DataProvider
(
'data'
,
shape
=
(
args
.
batch_size
,
num_inputs
,
img_size
,
img_size
))
inp
=
data
f
=
create_bn_relu
(
"conv1"
,
inp
,
ksize
=
7
,
stride
=
2
,
pad
=
3
,
num_outputs
=
64
,
has_relu
=
True
,
conv_name_fun
=
None
,
args
=
args
)
f
=
Pooling2D
(
"pool1"
,
f
,
window
=
3
,
stride
=
2
,
padding
=
1
,
mode
=
"MAX"
,
format
=
args
.
format
)
pre
=
[
2
,
3
,
4
,
5
]
stages
=
[
3
,
4
,
6
,
3
]
mid_outputs
=
[
64
,
128
,
256
,
512
]
enable_stride
=
[
False
,
True
,
True
,
True
]
for
p
,
s
,
o
,
es
in
zip
(
pre
,
stages
,
mid_outputs
,
enable_stride
):
for
i
in
range
(
s
):
has_proj
=
False
if
i
>
0
else
True
stride
=
1
if
not
es
or
i
>
0
else
2
prefix
=
"{}{}"
.
format
(
p
,
chr
(
ord
(
"a"
)
+
i
))
f
=
create_bottleneck
(
prefix
,
f
,
stride
,
o
,
o
*
4
,
args
,
has_proj
)
print
(
"{}
\t
{}"
.
format
(
prefix
,
f
.
partial_shape
))
f
=
Pooling2D
(
"pool5"
,
f
,
window
=
7
,
stride
=
7
,
padding
=
0
,
mode
=
"AVERAGE"
,
format
=
args
.
format
)
f
=
FullyConnected
(
"fc1000"
,
f
,
output_dim
=
1000
,
nonlinearity
=
mgsk
.
opr
.
helper
.
elemwise_trans
.
Identity
())
f
=
Softmax
(
"cls_softmax"
,
f
)
f
.
init_weights
()
net
=
RawNetworkBuilder
(
inputs
=
[
data
],
outputs
=
[
f
])
return
net
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'dump pkl model for resnet50'
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
)
parser
.
add_argument
(
'-b'
,
'--batch-size'
,
help
=
'batch size of the model'
,
default
=
1
)
parser
.
add_argument
(
'-f'
,
'--format'
,
choices
=
[
'NCHW'
,
'NCHW4'
],
help
=
'format of conv'
,
default
=
'NCHW'
)
parser
.
add_argument
(
'-o'
,
'--output'
,
help
=
'output pkl path'
,
required
=
True
)
args
=
parser
.
parse_args
()
if
args
.
format
!=
'NCHW'
:
print
(
'Only suppprt NCHW for float model'
)
parser
.
print_help
()
sys
.
exit
(
1
)
from
meghair.utils
import
io
io
.
dump
(
get
(
args
),
args
.
output
)
lite/load_and_run/examples/script/resnet50_mge.py
0 → 100755
浏览文件 @
4adba378
#!/usr/bin/env python3
import
argparse
import
math
import
megengine.functional
as
F
import
megengine.hub
as
hub
import
megengine.module
as
M
import
numpy
as
np
from
megengine
import
jit
,
tensor
class
BasicBlock
(
M
.
Module
):
expansion
=
1
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
groups
=
1
,
base_width
=
64
,
dilation
=
1
,
norm
=
M
.
BatchNorm2d
,
):
super
().
__init__
()
if
groups
!=
1
or
base_width
!=
64
:
raise
ValueError
(
"BasicBlock only supports groups=1 and base_width=64"
)
if
dilation
>
1
:
raise
NotImplementedError
(
"Dilation > 1 not supported in BasicBlock"
)
self
.
conv1
=
M
.
Conv2d
(
in_channels
,
channels
,
3
,
stride
,
padding
=
dilation
,
bias
=
False
)
self
.
bn1
=
norm
(
channels
)
self
.
conv2
=
M
.
Conv2d
(
channels
,
channels
,
3
,
1
,
padding
=
1
,
bias
=
False
)
self
.
bn2
=
norm
(
channels
)
self
.
downsample
=
(
M
.
Identity
()
if
in_channels
==
channels
and
stride
==
1
else
M
.
Sequential
(
M
.
Conv2d
(
in_channels
,
channels
,
1
,
stride
,
bias
=
False
),
norm
(
channels
),
)
)
def
forward
(
self
,
x
):
identity
=
x
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
identity
=
self
.
downsample
(
identity
)
x
+=
identity
x
=
F
.
relu
(
x
)
return
x
class
Bottleneck
(
M
.
Module
):
expansion
=
4
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
groups
=
1
,
base_width
=
64
,
dilation
=
1
,
norm
=
M
.
BatchNorm2d
,
):
super
().
__init__
()
width
=
int
(
channels
*
(
base_width
/
64.0
))
*
groups
self
.
conv1
=
M
.
Conv2d
(
in_channels
,
width
,
1
,
1
,
bias
=
False
)
self
.
bn1
=
norm
(
width
)
self
.
conv2
=
M
.
Conv2d
(
width
,
width
,
3
,
stride
,
padding
=
dilation
,
groups
=
groups
,
dilation
=
dilation
,
bias
=
False
,
)
self
.
bn2
=
norm
(
width
)
self
.
conv3
=
M
.
Conv2d
(
width
,
channels
*
self
.
expansion
,
1
,
1
,
bias
=
False
)
self
.
bn3
=
norm
(
channels
*
self
.
expansion
)
self
.
downsample
=
(
M
.
Identity
()
if
in_channels
==
channels
*
self
.
expansion
and
stride
==
1
else
M
.
Sequential
(
M
.
Conv2d
(
in_channels
,
channels
*
self
.
expansion
,
1
,
stride
,
bias
=
False
),
norm
(
channels
*
self
.
expansion
),
)
)
def
forward
(
self
,
x
):
identity
=
x
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
conv3
(
x
)
x
=
self
.
bn3
(
x
)
identity
=
self
.
downsample
(
identity
)
x
+=
identity
x
=
F
.
relu
(
x
)
return
x
class
ResNet
(
M
.
Module
):
def
__init__
(
self
,
block
,
layers
,
num_classes
=
1000
,
zero_init_residual
=
False
,
groups
=
1
,
width_per_group
=
64
,
replace_stride_with_dilation
=
None
,
norm
=
M
.
BatchNorm2d
,
):
super
().
__init__
()
self
.
in_channels
=
64
self
.
dilation
=
1
if
replace_stride_with_dilation
is
None
:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation
=
[
False
,
False
,
False
]
if
len
(
replace_stride_with_dilation
)
!=
3
:
raise
ValueError
(
"replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}"
.
format
(
replace_stride_with_dilation
)
)
self
.
groups
=
groups
self
.
base_width
=
width_per_group
self
.
conv1
=
M
.
Conv2d
(
3
,
self
.
in_channels
,
kernel_size
=
7
,
stride
=
2
,
padding
=
3
,
bias
=
False
)
self
.
bn1
=
norm
(
self
.
in_channels
)
self
.
maxpool
=
M
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
0
)
self
.
layer1
=
self
.
_make_layer
(
block
,
64
,
layers
[
0
],
norm
=
norm
)
self
.
layer2
=
self
.
_make_layer
(
block
,
128
,
layers
[
1
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
0
],
norm
=
norm
,
)
self
.
layer3
=
self
.
_make_layer
(
block
,
256
,
layers
[
2
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
1
],
norm
=
norm
,
)
self
.
layer4
=
self
.
_make_layer
(
block
,
512
,
layers
[
3
],
stride
=
2
,
dilate
=
replace_stride_with_dilation
[
2
],
norm
=
norm
,
)
self
.
fc
=
M
.
Linear
(
512
*
block
.
expansion
,
num_classes
)
for
m
in
self
.
modules
():
if
isinstance
(
m
,
M
.
Conv2d
):
M
.
init
.
msra_normal_
(
m
.
weight
,
mode
=
"fan_out"
,
nonlinearity
=
"relu"
)
if
m
.
bias
is
not
None
:
fan_in
,
_
=
M
.
init
.
calculate_fan_in_and_fan_out
(
m
.
weight
)
bound
=
1
/
math
.
sqrt
(
fan_in
)
M
.
init
.
uniform_
(
m
.
bias
,
-
bound
,
bound
)
elif
isinstance
(
m
,
M
.
BatchNorm2d
):
M
.
init
.
ones_
(
m
.
weight
)
M
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
M
.
Linear
):
M
.
init
.
msra_uniform_
(
m
.
weight
,
a
=
math
.
sqrt
(
5
))
if
m
.
bias
is
not
None
:
fan_in
,
_
=
M
.
init
.
calculate_fan_in_and_fan_out
(
m
.
weight
)
bound
=
1
/
math
.
sqrt
(
fan_in
)
M
.
init
.
uniform_
(
m
.
bias
,
-
bound
,
bound
)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block
# behaves like an identity. According to https://arxiv.org/abs/1706.02677
# This improves the model by 0.2~0.3%.
if
zero_init_residual
:
for
m
in
self
.
modules
():
if
isinstance
(
m
,
Bottleneck
):
M
.
init
.
zeros_
(
m
.
bn3
.
weight
)
elif
isinstance
(
m
,
BasicBlock
):
M
.
init
.
zeros_
(
m
.
bn2
.
weight
)
def
_make_layer
(
self
,
block
,
channels
,
blocks
,
stride
=
1
,
dilate
=
False
,
norm
=
M
.
BatchNorm2d
):
previous_dilation
=
self
.
dilation
if
dilate
:
self
.
dilation
*=
stride
stride
=
1
layers
=
[]
layers
.
append
(
block
(
self
.
in_channels
,
channels
,
stride
,
groups
=
self
.
groups
,
base_width
=
self
.
base_width
,
dilation
=
previous_dilation
,
norm
=
norm
,
)
)
self
.
in_channels
=
channels
*
block
.
expansion
for
_
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
self
.
in_channels
,
channels
,
groups
=
self
.
groups
,
base_width
=
self
.
base_width
,
dilation
=
self
.
dilation
,
norm
=
norm
,
)
)
return
M
.
Sequential
(
*
layers
)
def
extract_features
(
self
,
x
):
outputs
=
{}
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
F
.
relu
(
x
)
x
=
self
.
maxpool
(
x
)
outputs
[
"stem"
]
=
x
x
=
self
.
layer1
(
x
)
outputs
[
"res2"
]
=
x
x
=
self
.
layer2
(
x
)
outputs
[
"res3"
]
=
x
x
=
self
.
layer3
(
x
)
outputs
[
"res4"
]
=
x
x
=
self
.
layer4
(
x
)
outputs
[
"res5"
]
=
x
return
outputs
def
forward
(
self
,
x
):
x
=
F
.
reshape
(
x
,
(
1
,
3
,
224
,
224
))
x
=
self
.
extract_features
(
x
)[
"res5"
]
x
=
F
.
avg_pool2d
(
x
,
7
)
x
=
F
.
flatten
(
x
,
1
)
x
=
self
.
fc
(
x
)
return
x
@
hub
.
pretrained
(
"https://data.megengine.org.cn/models/weights/resnet50_fbaug_76254_4e14b7d1.pkl"
)
def
resnet50
(
**
kwargs
):
r
"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
"""
return
ResNet
(
Bottleneck
,
[
3
,
4
,
6
,
3
],
**
kwargs
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"dump mge model for resnet50"
,
formatter_class
=
argparse
.
ArgumentDefaultsHelpFormatter
,
)
parser
.
add_argument
(
"-b"
,
"--batch-size"
,
help
=
"batch size of the model"
,
default
=
1
,
type
=
int
)
parser
.
add_argument
(
"-d"
,
"--dtype"
,
help
=
"the dtype of the model,which includes float32 and uint8"
,
default
=
"float32"
,
type
=
str
,
)
parser
.
add_argument
(
"--inputs"
,
help
=
"set the inputs data to get a model with testcase"
,
default
=
""
,
type
=
str
,
)
parser
.
add_argument
(
"--dir"
,
help
=
"set the dir where the model to dump"
,
default
=
"."
,
type
=
str
,
)
parser
.
add_argument
(
"--enable-nchw4"
,
help
=
"enable-nchw4 for NVIDIA CUDNN"
,
action
=
'store_true'
)
parser
.
add_argument
(
"--enable-chwn4"
,
help
=
"enable-chwn4 for NVIDIA CUDNN"
,
action
=
'store_true'
)
args
=
parser
.
parse_args
()
net
=
resnet50
()
net
.
eval
()
@
jit
.
trace
(
symbolic
=
True
,
capture_as_const
=
True
)
def
fun
(
data
):
return
net
(
data
)
if
args
.
dtype
==
"float32"
or
args
.
dtype
==
"uint8"
:
# dump float32
data_type
=
np
.
float32
if
args
.
dtype
==
"uint8"
:
data_type
=
np
.
uint8
data
=
tensor
(
(
np
.
random
.
random
([
args
.
batch_size
,
3
,
224
,
224
])
*
255
).
astype
(
data_type
)
)
fun
(
data
)
if
args
.
inputs
==
""
:
fun
.
dump
(
args
.
dir
+
"/resnet50_b"
+
str
(
args
.
batch_size
)
+
"_"
+
args
.
dtype
+
"_without_data.mge"
,
arg_names
=
[
"data"
],
no_assert
=
True
,
enable_nchw4
=
args
.
enable_nchw4
,
enable_chwn4
=
args
.
enable_chwn4
,
)
else
:
fun
.
dump
(
args
.
dir
+
"/resnet50_b"
+
str
(
args
.
batch_size
)
+
"_"
+
args
.
dtype
+
"_with_data.mge"
,
arg_names
=
[
"data"
],
input_data
=
[
args
.
inputs
],
no_assert
=
True
,
enable_nchw4
=
args
.
enable_nchw4
,
)
else
:
raise
TypeError
(
"dtype should be float32"
)
\ No newline at end of file
lite/load_and_run/src/options/device_options.cpp
浏览文件 @
4adba378
...
...
@@ -148,7 +148,9 @@ XPUDeviceOption::XPUDeviceOption() {
}
if
(
!
FLAGS_multi_thread_core_ids
.
empty
())
{
mgb_assert
(
enable_multithread
,
"core ids should be set after --multithread"
);
mgb_assert
(
enable_multithread
||
enable_multithread_default
,
"core ids should be set after --multithread or --multithread-default"
);
std
::
stringstream
id_stream
(
FLAGS_multi_thread_core_ids
);
std
::
string
id
;
size_t
thread_cnt
=
0
;
...
...
lite/load_and_run/src/options/extern_c_opr_options.cpp
浏览文件 @
4adba378
...
...
@@ -126,7 +126,9 @@ void COprLibOption::init_extern_param(std::shared_ptr<ModelBase> model_ptr) {
void
COprLibOption
::
load_lib
()
{
auto
handle
=
dlopen
(
lib_path
.
c_str
(),
RTLD_LAZY
);
mgb_assert
(
handle
,
"failed to open c opr lib %s: %s"
,
lib_path
.
c_str
(),
dlerror
());
mgb_assert
(
handle
,
"failed to open c opr lib %s:
\n
errmsg: %s"
,
lib_path
.
c_str
(),
dlerror
());
const
char
*
entry
=
MGB_C_OPR_INIT_FUNC_STR
;
auto
func
=
dlsym
(
handle
,
entry
);
...
...
lite/load_and_run/src/options/io_options.cpp
浏览文件 @
4adba378
...
...
@@ -286,8 +286,13 @@ DEFINE_string(
DEFINE_string
(
io_dump
,
""
,
"set the io dump file path in text format"
);
DEFINE_bool
(
io_dump_stdout
,
false
,
"dump io opr to stdout in text format"
);
DEFINE_bool
(
io_dump_stderr
,
false
,
"dump io opr to stderr in text format"
);
DEFINE_string
(
bin_io_dump
,
""
,
"set the io dump file path in binary format"
);
DEFINE_string
(
bin_out_dump
,
""
,
"set the out dump file path in binary format"
);
DEFINE_string
(
bin_io_dump
,
""
,
"set the io dump directory path where variable in binary format located"
);
DEFINE_string
(
bin_out_dump
,
""
,
"set the out dump directory path where output variable in binary format "
"located"
);
DEFINE_bool
(
copy_to_host
,
false
,
"copy device data to host"
);
REGIST_OPTION_CREATOR
(
input
,
lar
::
InputOption
::
create_option
);
...
...
lite/load_and_run/src/options/optimize_options.cpp
浏览文件 @
4adba378
...
...
@@ -77,12 +77,6 @@ void WeightPreprocessOption::config_model_internel<ModelLite>(
if
(
weight_preprocess
)
{
LITE_WARN
(
"enable weight-preprocess optimization"
);
model
->
get_config
().
options
.
weight_preprocess
=
true
;
//! FIXME: algo searcher enable weight preprocess for opencl(
//! implement below has some problem);
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}
...
...
@@ -95,11 +89,6 @@ void WeightPreprocessOption::config_model_internel<ModelMdl>(
if
(
weight_preprocess
)
{
mgb_log_warn
(
"enable weight-preprocess optimization"
);
graph_option
.
graph_opt
.
enable_weight_preprocess
();
//! FIXME: this implemment is not right
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}
...
...
@@ -570,12 +559,6 @@ DEFINE_bool(
enable_jit
,
false
,
" Execute supported operators with JIT(now only support NVRTC). "
"Can only be used on Nvidia GPUs"
);
#if MGB_ENABLE_ANDROID_NN
DEFINE_bool
(
android_nn
,
false
,
"Execute supported operators with Android NN. Can only be used "
"with --cpu."
);
#endif
#if MGB_ENABLE_TENSOR_RT
DEFINE_bool
(
tensorrt
,
false
,
...
...
lite/load_and_run/src/options/plugin_options.cpp
浏览文件 @
4adba378
...
...
@@ -8,10 +8,10 @@
*/
#include "plugin_options.h"
#include <map>
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"
///////////////////// Plugin options///////////////////////////
namespace
lar
{
...
...
@@ -153,7 +153,12 @@ void DebugOption::format_and_print(
auto
table
=
mgb
::
TextTable
(
tablename
);
auto
&&
network
=
model
->
get_lite_network
();
table
.
padding
(
1
);
table
.
align
(
mgb
::
TextTable
::
Align
::
Mid
).
add
(
"type"
).
add
(
"name"
).
add
(
"shape"
).
eor
();
table
.
align
(
mgb
::
TextTable
::
Align
::
Mid
)
.
add
(
"type"
)
.
add
(
"name"
)
.
add
(
"shape"
)
.
add
(
"dtype"
)
.
eor
();
auto
to_string
=
[
&
](
lite
::
Layout
&
layout
)
{
std
::
string
shape
(
"{"
);
...
...
@@ -165,6 +170,19 @@ void DebugOption::format_and_print(
shape
.
append
(
"}"
);
return
shape
;
};
auto
get_dtype
=
[
&
](
lite
::
Layout
&
layout
)
{
std
::
map
<
LiteDataType
,
std
::
string
>
type_map
=
{
{
LiteDataType
::
LITE_FLOAT
,
"float32"
},
{
LiteDataType
::
LITE_HALF
,
"float16"
},
{
LiteDataType
::
LITE_INT64
,
"int64"
},
{
LiteDataType
::
LITE_INT
,
"int32"
},
{
LiteDataType
::
LITE_UINT
,
"uint32"
},
{
LiteDataType
::
LITE_INT16
,
"int16"
},
{
LiteDataType
::
LITE_UINT16
,
"uint16"
},
{
LiteDataType
::
LITE_INT8
,
"int8"
},
{
LiteDataType
::
LITE_UINT8
,
"uint8"
}};
return
type_map
[
layout
.
data_type
];
};
auto
input_name
=
network
->
get_all_input_name
();
for
(
auto
&
i
:
input_name
)
{
...
...
@@ -173,6 +191,7 @@ void DebugOption::format_and_print(
.
add
(
"INPUT"
)
.
add
(
i
)
.
add
(
to_string
(
layout
))
.
add
(
get_dtype
(
layout
))
.
eor
();
}
...
...
@@ -183,6 +202,7 @@ void DebugOption::format_and_print(
.
add
(
"OUTPUT"
)
.
add
(
i
)
.
add
(
to_string
(
layout
))
.
add
(
get_dtype
(
layout
))
.
eor
();
}
...
...
@@ -196,13 +216,28 @@ void DebugOption::format_and_print(
const
std
::
string
&
tablename
,
std
::
shared_ptr
<
ModelMdl
>
model
)
{
auto
table
=
mgb
::
TextTable
(
tablename
);
table
.
padding
(
1
);
table
.
align
(
mgb
::
TextTable
::
Align
::
Mid
).
add
(
"type"
).
add
(
"name"
).
add
(
"shape"
).
eor
();
table
.
align
(
mgb
::
TextTable
::
Align
::
Mid
)
.
add
(
"type"
)
.
add
(
"name"
)
.
add
(
"shape"
)
.
add
(
"dtype"
)
.
eor
();
auto
get_dtype
=
[
&
](
megdnn
::
DType
data_type
)
{
std
::
map
<
megdnn
::
DTypeEnum
,
std
::
string
>
type_map
=
{
{
mgb
::
dtype
::
Float32
().
enumv
(),
"float32"
},
{
mgb
::
dtype
::
Int32
().
enumv
(),
"int32"
},
{
mgb
::
dtype
::
Int16
().
enumv
(),
"int16"
},
{
mgb
::
dtype
::
Uint16
().
enumv
(),
"uint16"
},
{
mgb
::
dtype
::
Int8
().
enumv
(),
"int8"
},
{
mgb
::
dtype
::
Uint8
().
enumv
(),
"uint8"
}};
return
type_map
[
data_type
.
enumv
()];
};
for
(
auto
&&
i
:
model
->
get_mdl_load_result
().
tensor_map
)
{
table
.
align
(
mgb
::
TextTable
::
Align
::
Mid
)
.
add
(
"INPUT"
)
.
add
(
i
.
first
)
.
add
(
i
.
second
->
shape
().
to_string
())
.
add
(
get_dtype
(
i
.
second
->
dtype
()))
.
eor
();
}
...
...
@@ -211,6 +246,7 @@ void DebugOption::format_and_print(
.
add
(
"OUTPUT"
)
.
add
(
i
.
node
()
->
name
())
.
add
(
i
.
shape
().
to_string
())
.
add
(
get_dtype
(
i
.
dtype
()))
.
eor
();
}
...
...
@@ -358,18 +394,22 @@ DEFINE_double(
DEFINE_bool
(
check_dispatch
,
false
,
"check whether an operator call dispatch on cpu comp nodes"
);
"check whether an operator call dispatch on cpu comp nodes This is used to "
"find potential bugs in MegDNN"
);
DEFINE_string
(
check_var_value
,
""
,
"--check-var-value [interval]|[interval:init_idx], Enable "
"VarValueChecker plugin. Refer to its doc for more details"
);
"VarValueChecker plugin. check values of all vars in a graph from given var "
"ID(init_idx) with step interval"
);
#if MGB_ENABLE_JSON
DEFINE_string
(
profile
,
""
,
"Write profiling result to given file. The output file is in "
"JSON format"
);
DEFINE_string
(
profile_host
,
""
,
"focus on host time profiling For some backends"
);
DEFINE_string
(
profile_host
,
""
,
"focus on host time profiling For some backends(such as openCL)"
);
#endif
///////////////////// Debug gflags///////////////////////////
...
...
lite/load_and_run/src/strategys/strategy_normal.cpp
浏览文件 @
4adba378
...
...
@@ -94,15 +94,16 @@ void NormalStrategy::run_subline() {
m_runtime_param
.
stage
=
RunStage
::
AFTER_RUNNING_WAIT
;
stage_config_model
();
auto
cur
=
timer
.
get_msecs
();
printf
(
"iter %lu/%lu: %.3fms (exec=%.3fms)
\n
"
,
i
,
run_num
,
cur
,
exec_time
);
printf
(
"iter %lu/%lu: %.3f ms (exec=%.3f ms)
\n
"
,
i
,
run_num
,
cur
,
exec_time
);
time_sum
+=
cur
;
time_sqrsum
+=
cur
*
cur
;
fflush
(
stdout
);
min_time
=
std
::
min
(
min_time
,
cur
);
max_time
=
std
::
max
(
max_time
,
cur
);
}
printf
(
"
\n
=== finished test #%u: time=%.3f
ms avg_time=%.3f
ms "
"sexec=%.3f
ms min=%.3fms max=%.3f
ms
\n\n
"
,
printf
(
"
\n
=== finished test #%u: time=%.3f
ms avg_time=%.3f
ms "
"sexec=%.3f
ms min=%.3f ms max=%.3f
ms
\n\n
"
,
idx
,
time_sum
,
time_sum
/
run_num
,
std
::
sqrt
(
(
time_sqrsum
*
run_num
-
time_sum
*
time_sum
)
/
...
...
@@ -121,7 +122,7 @@ void NormalStrategy::run_subline() {
m_runtime_param
.
stage
=
RunStage
::
MODEL_RUNNING
;
stage_config_model
();
if
(
!
idx
)
{
if
(
idx
==
0
)
{
warm_up
();
}
tot_time
+=
run_iter
(
idx
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录