提交 4adba378 编写于 作者: M Megvii Engine Team

feat(lite): add example script and some small change for lar

GitOrigin-RevId: a28ed2f27a8d9fe8fb1ed2960477aeffd35f1857
上级 87f00232
# load and run example scripts
this is some typical examples for using of load and run, you can use
```bash
./example.sh
```
to run those examples, more details is in the script
the usage of script can be see by run this scirpt with:
```bash
./example.sh -h
```
#!/bin/bash
OLD_IFS="$IFS"
IFS=$'\n'
TYPICAL_MODEL_DATD="model_source/resnet50_b1_float32_without_data.mge --input \"data:input_data/resnet50_input.npy\""
DEVICE_DESC=""
WORK_DIR_PATH="."
RUN_ARM_DEVICE="false"
RUN_TARGET="diff_model"
ONLY_PREPARE_MODEL="false"
MODEL_PREAPRED="false"
ONLY_BUILD="false"
LAR_BUILT="false"
CLEAN_ALL="false"
RUN_TARGETS=("diff_model")
RUN_TARGETS+=("diff_device")
RUN_TARGETS+=("fast_run")
RUN_TARGETS+=("io")
RUN_TARGETS+=("layout")
RUN_TARGETS+=("optimize")
RUN_TARGETS+=("plugin")
RUN_TARGETS+=("all")
function usage() {
echo "$0 args1 args2 .."
echo "available args detail:"
echo "-p : prepare example model "
echo "-b : build load_and_run for x86/armv7/arm64 cpu and CUDA"
echo "-t : set the ssh arm device "
echo "-w : set the arm device workspace dir"
echo "-c : clean all"
echo "-a : run all test"
echo "-e : set the running target for test (details use \"-e\" to see)"
echo "-h : show usage"
exit -1
}
while getopts "pbcahe:w:t:" arg
do
case $arg in
t)
DEVICE_DESC=$OPTARG
RUN_ARM_DEVICE="true"
echo "config arm device DEVICE_DESC to ${DEVICE_DESC}"
;;
w)
WORK_DIR_PATH=$OPTARG
echo "config arm device WORK_DIR_PATH to ${WORK_DIR_PATH}"
;;
e)
tmp_target=null
for target in ${RUN_TARGETS[@]}; do
if [ "$target" = "$OPTARG" ]; then
echo "CONFIG BUILD RUN_TARGET to : $OPTARG"
tmp_target=$OPTARG
RUN_TARGET=$OPTARG
break
fi
done
if [ "$tmp_target" = "null" ]; then
echo "ERR args for target (-e)"
echo "available target usage :"
for target in ${RUN_TARGETS[@]}; do
echo " -e $target"
done
exit -1
fi
;;
h)
echo "show usage"
usage
;;
a)
echo "config RUN_TARGET=all"
RUN_TARGET="all"
;;
c)
echo "clean all directory generated by script"
CLEAN_ALL="true"
;;
b)
echo "run build"
ONLY_BUILD="true"
;;
p)
echo "prepare model and input"
ONLY_PREPARE_MODEL="true"
;;
?)
echo "unkonw argument"
usage
;;
esac
done
function prepare_model_and_data(){
rm -rf model_source && mkdir model_source
# dump mgb model
python3 script/resnet50_mgb.py -o model_source/resnet50.pkl
../dump_with_testcase.py model_source/resnet50.pkl -o model_source/resnet50_with_data.mgb -d "#rand(0, 255)" --no-assert
# prepare simple add model
python3 script/add_demo.py --dir model_source
python3 script/conv_demo.py --dir model_source
#generate trt model
script/gen_trt_model.sh
#prepare mge model
python3 script/resnet50_mge.py --dir model_source
python3 script/resnet50_mge.py --dir model_source -d int8
python3 script/resnet50_mge.py --dir model_source --inputs "#rand(0,255)"
#make input_data
rm -rf input_data && mkdir input_data
python3 script/mge_input_data.py
rm -rf tmpdir && mkdir tmpdir
}
function build_lar(){
# build cpu and cuda version
../../../scripts/cmake-build/host_build.sh -r -t -e load_and_run
#WARNING:config the cuda environment before compile
../../../scripts/cmake-build/host_build.sh -c -t -e load_and_run
# # build arm version
../../../scripts/cmake-build/cross_build_android_arm_inference.sh -r -a arm64-v8a -e load_and_run
../../../scripts/cmake-build/cross_build_android_arm_inference.sh -r -a armeabi-v7a -e load_and_run
# link or for test
ln -s ../../../build_dir/host/MGE_WITH_CUDA_OFF/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cpu
ln -s ../../../build_dir/host/MGE_WITH_CUDA_ON/MGE_INFERENCE_ONLY_OFF/Release//build/lite/load_and_run/load_and_run lar_cuda
cp ../../../build_dir/android/arm64-v8a/Release/build/lite/load_and_run/load_and_run ./lar_arm64
cp ../../../build_dir/android/armeabi-v7a/Release/build/lite/load_and_run/load_and_run ./lar_armv7
}
function set_arm_device_and_upload(){
DEVICE_DESC="${1}"
WORK_DIR_PATH="${2}"
RUN_ARM_DEVICE="${3}"
cmd="rsync -aP -zz ./lar_arm64 ./lar_armv7 model_source/resnet50_b1_float32_without_data.mge input_data/resnet50_input.npy $DEVICE_DESC:$WORK_DIR_PATH/"
echo $cmd
bash -c "$cmd"
}
function test_different_model(){
CmdArray=("./lar_cpu model_source/resnet50_with_data.mgb")
CmdArray+=("./lar_cpu model_source/resnet50_b1_float32_with_data.mge")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --lite")
for cmd in ${CmdArray[@]}; do
echo "$cmd"
bash -c "$cmd"
done
}
function test_different_device(){
#dispatch时,计算任务会加入一个工作队列,由队列统一管理执行 均值 131.278 ms 标准差 15.197ms m_asyc_exec异步执行
CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD --cpu")
#dispatch时,计算任务直接执行 均值 131.875 ms 标准差 7.758ms m_asyc_exec同步执行
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu-default")
#多线程运行 1~8平均运行时间(ms):129.611, 84.266, 76.963, 55.212, 69.283, 77.338, 58.386, 64.585
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread 4")
#主线程锁核,其他任务在线程池中的线程上运行 132.614, 83.095, 69.792, 54.452, 48.890, 48.206, 46.386, 53.908
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread-default 4")
#cpu多线程绑核(x86上绑核影响不大)
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --multithread 2 --multi-thread-core-ids 1,5")
#xpu 设置为cpu上运行 132.740 ms comp_node:cpu
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cpu")
#xpu 设置为cuda上运行 6.495 ms comp_node:gpu
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
function test_fast_run(){
CmdArray=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --full-run")
#fast run 搜参
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --fast-run-algo-policy tmpdir/algo_cache_file")
#fast run 带参执行
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run-algo-policy tmpdir/algo_cache_file")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --reproducible")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --fast-run-shared-batch-size 1")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --fast-run --binary-equal-between-batch")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
}
function test_io(){
rm -rf tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_out_info_cuda tmpdir/io_info.txt
mkdir tmpdir/bin_io_info tmpdir/bin_out_info tmpdir/bin_io_info_cuda
CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"input_data/add_demo_input.json\"")
#! the model must support input with nhwc shape
CmdArray+=("./lar_cpu model_source/resnet50_b1_int8_without_data.mge --input \"data:input_data/cat.ppm\"")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\"")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump tmpdir/io_info.txt --iter 1 --warmup-iter 0")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump-stdout --iter 1 --warmup-iter 0")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --io-dump-stderr --iter 1 --warmup-iter 0")
#different data in the given directory the name is the var id which is the same with txt-dump information
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-io-dump tmpdir/bin_io_info --iter 1 --warmup-iter 0")
CmdArray+=("./lar_cuda model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-io-dump tmpdir/bin_io_info_cuda --iter 1 --warmup-iter 0")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --bin-out-dump tmpdir/bin_out_info --iter 1 --warmup-iter 0")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --copy-to-host")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
#compare the binary io information
python3 ../../../imperative/python/megengine/tools/compare_binary_iodump.py tmpdir/bin_io_info tmpdir/bin_io_info_cuda
}
function test_layout_related(){
# very little speed up
CmdArray=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw4")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-chwn4")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw32")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-nchw64")
#speed up
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --enable-nchw88")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda --layout-transform cuda")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --cuda --layout-transform cuda --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge")
CmdArray+=("./lar_cuda model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cuda.mge")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu --layout-transform cpu")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --cpu --layout-transform cpu --layout-transform-dump model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge")
CmdArray+=("./lar_cpu model_source/resnet50_b1_float32_without_data_with_global_layout_trans_cpu.mge")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
if [ ${RUN_ARM_DEVICE} == "true" ] ; then
#speed up
CmdArray=("./lar_arm64 resnet50_b1_float32_without_data.mge --input \"data:resnet50_input.npy\" --cpu --enable-nchw44")
#speed up
CmdArray+=("./lar_arm64 resnet50_b1_float32_without_data.mge --input \"data:resnet50_input.npy\" --cpu --enable-nchw44-dot")
for cmd in ${CmdArray[@]}; do
echo $cmd
ssh -t $DEVICE_DESC "unset LD_PRELOAD && cd $WORK_DIR_PATH && LD_LIBRARY_PATH=./ $cmd"
done
else
echo "SET arm device ON : $RUN_ARM_DEVICE"
fi
}
function test_optimize(){
CmdArray=("./lar_cpu $TYPICAL_MODEL_DATD --enable-fuse-preprocess")
#warm up speed up
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-fuse-conv-bias-nonlinearity")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable-fuse-conv-bias-with-z")
CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt")
CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt --tensorrt-cache tmpdir/TRT_cache")
CmdArray+=("./lar_cuda model_source/trt_conv_demo_with_data.mgb --tensorrt-cache tmpdir/TRT_cache")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --no-sanity-check --record-comp-seq2")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --disable_mem_opt")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --workspace_limit 10000")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --fake-first")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --enable_jit ")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
}
function test_plugin(){
rm -rf tmpdir/staticMemInfoDir tmpdir/staticMemInfoDirLogs
mkdir tmpdir/staticMemInfoDir
CmdArray=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --check-dispatch")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --check-var-value 5:0")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --range 2")
CmdArray+=("./lar_cpu model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --profile tmpdir/opr_profile.json
")
CmdArray+=("./lar_cuda model_source/add_demo_f32_without_data.mge --input \"data:[2,3,4]\" --profile-host tmpdir/opr_profile_host.json")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --model-info")
CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --verbose")
CmdArray+=("./lar_cpu model_source/resnet50_with_data.mgb --disable-assert-throw")
# wait gdb attach to given PID
# CmdArray+=("./lar_cpu $TYPICAL_MODEL_DATD --wait-gdb")
CmdArray+=("./lar_cuda $TYPICAL_MODEL_DATD --get-static-mem-info tmpdir/staticMemInfoDir")
for cmd in ${CmdArray[@]}; do
echo $cmd
bash -c "$cmd"
done
# view the graph with given url (usally: http://localhost:6006/)
# mkdir tmpdir/staticMemInfoDirLogs && python3 ../../../imperative/python/megengine/tools/graph_info_analyze.py -i tmpdir/staticMemInfoDir -o tmpdir/staticMemInfoDirLogs
# pip3 install tensorboard && tensorboard --logdir tmpdir/staticMemInfoDirLogs
}
function clean(){
rm -rf tmpdir model_source input_data lar_cpu lar_cuda lar_arm64 lar_armv7
}
function main(){
if [ ${CLEAN_ALL} == "true" ] ; then
clean
exit 0
fi
if [ ${ONLY_PREPARE_MODEL} == "true" ] ; then
prepare_model_and_data
MODEL_PREAPRED="true"
exit 0
fi
if [ ${ONLY_BUILD} == "true" ] ; then
build_lar
LAR_BUILT="true"
exit 0
fi
if [ ${RUN_ARM_DEVICE} == "true" ] ; then
set_arm_device_and_upload $DEVICE_DESC "$WORK_DIR_PATH" "true"
fi
if [ ${MODEL_PREAPRED} != "true" ] ; then
CHECK_MODEL=$(find . -name add_demo_input.json)
if [ ${CHECK_MODEL} == "" ] ; then
prepare_model_and_data
MODEL_PREAPRED="true"
fi
fi
if [ ${LAR_BUILT} != "true" ] ; then
CHECK_LAR=$(find . -name lar_armv7)
if [ ${CHECK_LAR} == "" ] ; then
build_lar
LAR_BUILT="true"
fi
fi
if [ ${RUN_TARGET} == "diff_model" -o ${RUN_TARGET} == "all" ] ; then
test_different_model
fi
if [ ${RUN_TARGET} == "diff_device" -o ${RUN_TARGET} == "all" ] ; then
test_different_device
fi
if [ ${RUN_TARGET} == "fast_run" -o ${RUN_TARGET} == "all" ] ; then
test_fast_run
fi
if [ ${RUN_TARGET} == "io" -o ${RUN_TARGET} == "all" ] ; then
test_io
fi
if [ ${RUN_TARGET} == "layout" -o ${RUN_TARGET} == "all" ] ; then
test_layout_related
fi
if [ ${RUN_TARGET} == "optimize" -o ${RUN_TARGET} == "all" ] ; then
test_optimize
fi
if [ ${RUN_TARGET} == "plugin" -o ${RUN_TARGET} == "all" ] ; then
test_plugin
fi
}
main
IFS=$OLD_IFS
\ No newline at end of file
#!/usr/bin/env python3
import argparse
import math
import megengine.functional as F
import megengine.hub as hub
import megengine.module as M
import numpy as np
from megengine import jit, tensor, Parameter
class Simple(M.Module):
def __init__(self):
super().__init__()
self.a = Parameter([0,1,2], dtype=np.float32)
def forward(self, x):
x = x + self.a
return x
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="dump mge model for add_demo",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--inputs",
help="set the inputs data to get a model with testcase",
default="",
type=str,
)
parser.add_argument(
"--dir",
help="set the dir where the model to dump",
default=".",
type=str,
)
args = parser.parse_args()
net = Simple()
net.eval()
@jit.trace(symbolic=True, capture_as_const=True)
def fun(data):
return net(data)
data = tensor([3,4,5])
fun(data)
if args.inputs == "":
fun.dump(
args.dir + "/add_demo_f32_without_data.mge", arg_names=["data"],
no_assert=True,
)
else:
fun.dump(
args.dir + "/add_demo_f32_with_data.mge", arg_names=["data"],
input_data=[args.inputs], no_assert=True,
)
\ No newline at end of file
#!/usr/bin/env python3
import argparse
import math
import megengine.functional as F
import megengine.module as M
import numpy as np
from megengine import jit, tensor
class ConvNet(M.Module):
def __init__(self):
super().__init__()
self.conv1 = M.Conv2d(in_channels=3, out_channels=1, kernel_size=3, bias=False)
def forward(self, input):
x = self.conv1(input)
return x
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="dump mge model for add_demo",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"--dir",
help="set the dir where the model to dump",
default=".",
type=str,
)
args = parser.parse_args()
net = ConvNet()
net.eval()
@jit.trace(symbolic=True, capture_as_const=True)
def fun(data):
return net(data)
inp = tensor(np.arange(0, 96).astype("float32").reshape(2, 3, 4, 4))
out = fun(inp)
fun.dump(args.dir + "/conv_demo_f32_without_data.mge", arg_names=["data"],
no_assert=True)
\ No newline at end of file
#!/usr/bin/env python3
from megskull.network import RawNetworkBuilder
import megskull.opr.all as O
from megskull.opr.external import TensorRTRuntimeOpr
from meghair.utils.io import dump
import argparse
def str2tuple(x):
x = x.split(',')
x = [int(a) for a in x]
x = tuple(x)
return x
def make_network(model, isize):
data = [O.DataProvider('input{}'.format(i), shape=isizes[i])
for i in range(len(isizes))]
f = open(model, 'rb')
engine = f.read()
opr = TensorRTRuntimeOpr(data, engine, 1)
net = RawNetworkBuilder(inputs=[data], outputs=opr.outputs)
return net
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(dest = 'model')
parser.add_argument(dest = 'output')
parser.add_argument('--isize', help='input sizes. '
'e.g. for models with two (1,3,224,224) inputs, '
'the option --isize="1,3,224,224;1,3,224,224" should be used')
args = parser.parse_args()
isizes = [str2tuple(x) for x in args.isize.split(';')]
net = make_network(args.model, isizes)
dump(net, args.output)
\ No newline at end of file
#!/bin/bash
CUR_DIR="$( cd "$(dirname $0)" >/dev/null 2>&1 ; pwd -P )"
# find correct trtexec version, only works for internal ci and brainpp env setups
CUDA_VERSION=$(nvcc --version | grep -o "[0-9].\.[0-9]*" | head -n 1)
SEARCH_PATH=$(echo `which nvcc | xargs dirname`/../../)
TRT_CANDIDATE=$(find `cd $SEARCH_PATH; pwd` -name "trtexec" | grep "bin/trtexec" | grep $CUDA_VERSION)
TRT_CANDIDATE=${TRT_CANDIDATE%$'\n'*}
TRT_LIB_PATH=$(readlink -f "`dirname $TRT_CANDIDATE`/../lib")
MODELS_PATH=$(readlink -f "${CUR_DIR}/../model_source")
# generate mge model
rm -rf $MODELS_PATH/conv_demo_f32_without_data.mge
python3 ${CUR_DIR}/conv_demo.py --dir $MODELS_PATH
# generate caffe model with mge convert
# INSTALL mgeconvert:
# python3 -m pip install git+https://github.com/MegEngine/mgeconvert.git --user --install-option="--targets=caffe"
rm -rf $MODELS_PATH/conv_demo.prototxt $MODELS_PATH/conv_demo.caffemodel
convert mge_to_caffe -i $MODELS_PATH/conv_demo_f32_without_data.mge -c $MODELS_PATH/conv_demo.prototxt -b $MODELS_PATH/conv_demo.caffemodel
# generate trt model
rm -rf $MODELS_PATH/conv_demo.trt
echo "WARNING: config cudnn and cublas path before run trtexec"
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$TRT_LIB_PATH
echo $LD_LIBRARY_PATH
$TRT_CANDIDATE --deploy="$MODELS_PATH/conv_demo.prototxt" --model="$MODELS_PATH/conv_demo.caffemodel" --output="conv1_Convolution" --batch=1 --saveEngine="$MODELS_PATH/conv_demo.trt"
# redump trt model into mgb model
rm -rf $MODELS_PATH/trt_conv_demo.pkl $MODELS_PATH/trt_conv_demo_with_data.mgb
python3 $CUR_DIR/dump_trt.py $MODELS_PATH/conv_demo.trt $MODELS_PATH/trt_conv_demo.pkl --isize="1,3,4,4"
$CUR_DIR/../../dump_with_testcase.py $MODELS_PATH/trt_conv_demo.pkl -o $MODELS_PATH/trt_conv_demo_with_data.mgb -d "#rand(0, 255)" --no-assert
\ No newline at end of file
#!/usr/bin/env python3
import numpy as np
import cv2
import megengine.data.transform as T
import megengine.functional as F
import json
import urllib
url, filename = ("https://data.megengine.org.cn/images/cat.jpg", "input_data/cat.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)
# numpy data
data=np.random.rand(1,3,224,224)
np.save("input_data/resnet50_input_uint8.npy",data.astype(np.uint8))
np.save("input_data/resnet50_input.npy",data.astype(np.float32))
#ppm data
image = cv2.imread("input_data/cat.jpg")
transform = T.Compose([
T.Resize(256),
T.CenterCrop(224),
])
processed_img = transform.apply(image)
cv2.imwrite("input_data/cat.ppm",processed_img)
#json
data_obj = {
"shape": [1,3],
"type": "int32",
"raw": [2,3,4]
}
with open("input_data/add_demo_input.json", "w") as f:
json.dump({"data":data_obj},f)
\ No newline at end of file
#!/usr/bin/env python3
import megbrain as mgb
from megskull.graph import FpropEnv
import megskull as mgsk
from megskull.opr.compatible.caffepool import CaffePooling2D
from megskull.opr.arith import ReLU
from megskull.opr.all import (
DataProvider, Conv2D, Pooling2D, FullyConnected,
Softmax, Dropout, BatchNormalization, CrossEntropyLoss,
ElementwiseAffine, WarpPerspective, WarpPerspectiveWeightProducer,
WeightDecay, ParamProvider, ConvBiasActivation, ElemwiseMultiType)
from megskull.network import RawNetworkBuilder
from megskull.utils.debug import CallbackInjector
import megskull.opr.helper.param_init as pinit
from megskull.opr.helper.elemwise_trans import Identity
from megskull.opr.netsrc import DataProvider
from megskull.opr.cnn import Conv2D, Pooling2D, FullyConnected, Softmax, Conv2DImplHelper
from megskull.opr.loss import CrossEntropyLoss
from megskull.opr.regularizer import Dropout, BatchNormalization
from megskull.opr.arith import Add, ReLU
from megskull.opr.netsrc import ConstProvider
from megskull.network import RawNetworkBuilder
import numpy as np
from megskull.network import RawNetworkBuilder, NetworkVisitor
from megskull.graph import iter_dep_opr
from megskull.utils.misc import get_2dshape
import functools
import re
import fnmatch
import argparse
import sys
def create_bn_relu_float(conv_name, f_in, ksize, stride, pad, num_outputs,
has_relu, args):
f = Conv2D(conv_name, f_in, kernel_shape=ksize, stride=stride,
padding=pad, output_nr_channel=num_outputs,
nonlinearity=mgsk.opr.helper.elemwise_trans.Identity())
if has_relu:
f = ReLU(f)
return f
def get_num_inputs(feature, format):
if format == 'NCHW':
return feature.partial_shape[1]
else:
assert format == 'NCHW4'
return feature.partial_shape[1] * 4
def create_bn_relu(prefix, f_in, ksize, stride, pad, num_outputs,
has_relu, conv_name_fun, args):
if conv_name_fun:
conv_name = conv_name_fun(prefix)
else:
conv_name = prefix
return create_bn_relu_float(conv_name, f_in, ksize, stride, pad,
num_outputs, has_relu, args)
def create_bottleneck(prefix, f_in, stride, num_outputs1, num_outputs2, args,
has_proj=False):
proj = f_in
if has_proj:
proj = create_bn_relu(prefix, f_in, ksize=1, stride=stride, pad=0,
num_outputs=num_outputs2, has_relu=False,
conv_name_fun=lambda p: "interstellar{}_branch1".format(
p), args=args)
f = create_bn_relu(prefix, f_in, ksize=1, stride=1, pad=0,
num_outputs=num_outputs1, has_relu=True,
conv_name_fun=lambda p: "interstellar{}_branch2a".format(
p), args=args)
f = create_bn_relu(prefix, f, ksize=3, stride=stride, pad=1,
num_outputs=num_outputs1, has_relu=True,
conv_name_fun=lambda p: "interstellar{}_branch2b".format(
p), args=args)
f = create_bn_relu(prefix, f, ksize=1, stride=1, pad=0,
num_outputs=num_outputs2, has_relu=False,
conv_name_fun=lambda p: "interstellar{}_branch2c".format(
p), args=args)
f = ReLU(f + proj)
return f
def get(args):
img_size = 224
num_inputs = 3
data = DataProvider('data', shape=(args.batch_size, num_inputs,
img_size, img_size))
inp = data
f = create_bn_relu("conv1", inp, ksize=7, stride=2, pad=3, num_outputs=64,
has_relu=True, conv_name_fun=None,
args=args)
f = Pooling2D("pool1", f, window=3, stride=2, padding=1, mode="MAX",
format=args.format)
pre = [2, 3, 4, 5]
stages = [3, 4, 6, 3]
mid_outputs = [64, 128, 256, 512]
enable_stride = [False, True, True, True]
for p, s, o, es in zip(pre, stages, mid_outputs, enable_stride):
for i in range(s):
has_proj = False if i > 0 else True
stride = 1 if not es or i > 0 else 2
prefix = "{}{}".format(p, chr(ord("a") + i))
f = create_bottleneck(prefix, f, stride, o, o * 4, args, has_proj)
print("{}\t{}".format(prefix, f.partial_shape))
f = Pooling2D("pool5", f, window=7, stride=7, padding=0, mode="AVERAGE",
format=args.format)
f = FullyConnected("fc1000", f, output_dim=1000,
nonlinearity=mgsk.opr.helper.elemwise_trans.Identity())
f = Softmax("cls_softmax", f)
f.init_weights()
net = RawNetworkBuilder(inputs=[data], outputs=[f])
return net
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='dump pkl model for resnet50',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-b', '--batch-size',
help='batch size of the model', default=1)
parser.add_argument('-f', '--format', choices=['NCHW', 'NCHW4'],
help='format of conv',
default='NCHW')
parser.add_argument('-o', '--output',
help='output pkl path', required=True)
args = parser.parse_args()
if args.format != 'NCHW':
print('Only suppprt NCHW for float model')
parser.print_help()
sys.exit(1)
from meghair.utils import io
io.dump(get(args), args.output)
#!/usr/bin/env python3
import argparse
import math
import megengine.functional as F
import megengine.hub as hub
import megengine.module as M
import numpy as np
from megengine import jit, tensor
class BasicBlock(M.Module):
expansion = 1
def __init__(
self,
in_channels,
channels,
stride=1,
groups=1,
base_width=64,
dilation=1,
norm=M.BatchNorm2d,
):
super().__init__()
if groups != 1 or base_width != 64:
raise ValueError("BasicBlock only supports groups=1 and base_width=64")
if dilation > 1:
raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
self.conv1 = M.Conv2d(
in_channels, channels, 3, stride, padding=dilation, bias=False
)
self.bn1 = norm(channels)
self.conv2 = M.Conv2d(channels, channels, 3, 1, padding=1, bias=False)
self.bn2 = norm(channels)
self.downsample = (
M.Identity()
if in_channels == channels and stride == 1
else M.Sequential(
M.Conv2d(in_channels, channels, 1, stride, bias=False), norm(channels),
)
)
def forward(self, x):
identity = x
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.conv2(x)
x = self.bn2(x)
identity = self.downsample(identity)
x += identity
x = F.relu(x)
return x
class Bottleneck(M.Module):
expansion = 4
def __init__(
self,
in_channels,
channels,
stride=1,
groups=1,
base_width=64,
dilation=1,
norm=M.BatchNorm2d,
):
super().__init__()
width = int(channels * (base_width / 64.0)) * groups
self.conv1 = M.Conv2d(in_channels, width, 1, 1, bias=False)
self.bn1 = norm(width)
self.conv2 = M.Conv2d(
width,
width,
3,
stride,
padding=dilation,
groups=groups,
dilation=dilation,
bias=False,
)
self.bn2 = norm(width)
self.conv3 = M.Conv2d(width, channels * self.expansion, 1, 1, bias=False)
self.bn3 = norm(channels * self.expansion)
self.downsample = (
M.Identity()
if in_channels == channels * self.expansion and stride == 1
else M.Sequential(
M.Conv2d(in_channels, channels * self.expansion, 1, stride, bias=False),
norm(channels * self.expansion),
)
)
def forward(self, x):
identity = x
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.conv2(x)
x = self.bn2(x)
x = F.relu(x)
x = self.conv3(x)
x = self.bn3(x)
identity = self.downsample(identity)
x += identity
x = F.relu(x)
return x
class ResNet(M.Module):
def __init__(
self,
block,
layers,
num_classes=1000,
zero_init_residual=False,
groups=1,
width_per_group=64,
replace_stride_with_dilation=None,
norm=M.BatchNorm2d,
):
super().__init__()
self.in_channels = 64
self.dilation = 1
if replace_stride_with_dilation is None:
# each element in the tuple indicates if we should replace
# the 2x2 stride with a dilated convolution instead
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError(
"replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation)
)
self.groups = groups
self.base_width = width_per_group
self.conv1 = M.Conv2d(
3, self.in_channels, kernel_size=7, stride=2, padding=3, bias=False
)
self.bn1 = norm(self.in_channels)
self.maxpool = M.MaxPool2d(kernel_size=3, stride=2, padding=0)
self.layer1 = self._make_layer(block, 64, layers[0], norm=norm)
self.layer2 = self._make_layer(
block,
128,
layers[1],
stride=2,
dilate=replace_stride_with_dilation[0],
norm=norm,
)
self.layer3 = self._make_layer(
block,
256,
layers[2],
stride=2,
dilate=replace_stride_with_dilation[1],
norm=norm,
)
self.layer4 = self._make_layer(
block,
512,
layers[3],
stride=2,
dilate=replace_stride_with_dilation[2],
norm=norm,
)
self.fc = M.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, M.Conv2d):
M.init.msra_normal_(m.weight, mode="fan_out", nonlinearity="relu")
if m.bias is not None:
fan_in, _ = M.init.calculate_fan_in_and_fan_out(m.weight)
bound = 1 / math.sqrt(fan_in)
M.init.uniform_(m.bias, -bound, bound)
elif isinstance(m, M.BatchNorm2d):
M.init.ones_(m.weight)
M.init.zeros_(m.bias)
elif isinstance(m, M.Linear):
M.init.msra_uniform_(m.weight, a=math.sqrt(5))
if m.bias is not None:
fan_in, _ = M.init.calculate_fan_in_and_fan_out(m.weight)
bound = 1 / math.sqrt(fan_in)
M.init.uniform_(m.bias, -bound, bound)
# Zero-initialize the last BN in each residual branch,
# so that the residual branch starts with zeros, and each residual block
# behaves like an identity. According to https://arxiv.org/abs/1706.02677
# This improves the model by 0.2~0.3%.
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
M.init.zeros_(m.bn3.weight)
elif isinstance(m, BasicBlock):
M.init.zeros_(m.bn2.weight)
def _make_layer(
self, block, channels, blocks, stride=1, dilate=False, norm=M.BatchNorm2d
):
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
layers = []
layers.append(
block(
self.in_channels,
channels,
stride,
groups=self.groups,
base_width=self.base_width,
dilation=previous_dilation,
norm=norm,
)
)
self.in_channels = channels * block.expansion
for _ in range(1, blocks):
layers.append(
block(
self.in_channels,
channels,
groups=self.groups,
base_width=self.base_width,
dilation=self.dilation,
norm=norm,
)
)
return M.Sequential(*layers)
def extract_features(self, x):
outputs = {}
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.maxpool(x)
outputs["stem"] = x
x = self.layer1(x)
outputs["res2"] = x
x = self.layer2(x)
outputs["res3"] = x
x = self.layer3(x)
outputs["res4"] = x
x = self.layer4(x)
outputs["res5"] = x
return outputs
def forward(self, x):
x = F.reshape(x, (1,3,224,224))
x = self.extract_features(x)["res5"]
x = F.avg_pool2d(x, 7)
x = F.flatten(x, 1)
x = self.fc(x)
return x
@hub.pretrained(
"https://data.megengine.org.cn/models/weights/resnet50_fbaug_76254_4e14b7d1.pkl"
)
def resnet50(**kwargs):
r"""ResNet-50 model from
`"Deep Residual Learning for Image Recognition" <https://arxiv.org/pdf/1512.03385.pdf>`_
"""
return ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="dump mge model for resnet50",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.add_argument(
"-b", "--batch-size", help="batch size of the model", default=1, type=int
)
parser.add_argument(
"-d",
"--dtype",
help="the dtype of the model,which includes float32 and uint8",
default="float32",
type=str,
)
parser.add_argument(
"--inputs",
help="set the inputs data to get a model with testcase",
default="",
type=str,
)
parser.add_argument(
"--dir",
help="set the dir where the model to dump",
default=".",
type=str,
)
parser.add_argument(
"--enable-nchw4",
help="enable-nchw4 for NVIDIA CUDNN",
action='store_true'
)
parser.add_argument(
"--enable-chwn4",
help="enable-chwn4 for NVIDIA CUDNN",
action='store_true'
)
args = parser.parse_args()
net = resnet50()
net.eval()
@jit.trace(symbolic=True, capture_as_const=True)
def fun(data):
return net(data)
if args.dtype == "float32" or args.dtype == "uint8":
# dump float32
data_type=np.float32
if args.dtype == "uint8":
data_type =np.uint8
data = tensor(
(np.random.random([args.batch_size, 3, 224, 224])*255).astype(data_type)
)
fun(data)
if args.inputs == "":
fun.dump(
args.dir + "/resnet50_b" + str(args.batch_size) + "_"+ args.dtype +"_without_data.mge", arg_names=["data"],
no_assert=True, enable_nchw4=args.enable_nchw4, enable_chwn4=args.enable_chwn4,
)
else:
fun.dump(
args.dir + "/resnet50_b" + str(args.batch_size) + "_"+ args.dtype +"_with_data.mge", arg_names=["data"],
input_data=[args.inputs], no_assert=True, enable_nchw4=args.enable_nchw4,
)
else:
raise TypeError("dtype should be float32")
\ No newline at end of file
......@@ -148,7 +148,9 @@ XPUDeviceOption::XPUDeviceOption() {
}
if (!FLAGS_multi_thread_core_ids.empty()) {
mgb_assert(enable_multithread, "core ids should be set after --multithread");
mgb_assert(
enable_multithread || enable_multithread_default,
"core ids should be set after --multithread or --multithread-default");
std::stringstream id_stream(FLAGS_multi_thread_core_ids);
std::string id;
size_t thread_cnt = 0;
......
......@@ -126,7 +126,9 @@ void COprLibOption::init_extern_param(std::shared_ptr<ModelBase> model_ptr) {
void COprLibOption::load_lib() {
auto handle = dlopen(lib_path.c_str(), RTLD_LAZY);
mgb_assert(handle, "failed to open c opr lib %s: %s", lib_path.c_str(), dlerror());
mgb_assert(
handle, "failed to open c opr lib %s:\n errmsg: %s", lib_path.c_str(),
dlerror());
const char* entry = MGB_C_OPR_INIT_FUNC_STR;
auto func = dlsym(handle, entry);
......
......@@ -286,8 +286,13 @@ DEFINE_string(
DEFINE_string(io_dump, "", "set the io dump file path in text format");
DEFINE_bool(io_dump_stdout, false, "dump io opr to stdout in text format");
DEFINE_bool(io_dump_stderr, false, "dump io opr to stderr in text format");
DEFINE_string(bin_io_dump, "", "set the io dump file path in binary format");
DEFINE_string(bin_out_dump, "", "set the out dump file path in binary format");
DEFINE_string(
bin_io_dump, "",
"set the io dump directory path where variable in binary format located");
DEFINE_string(
bin_out_dump, "",
"set the out dump directory path where output variable in binary format "
"located");
DEFINE_bool(copy_to_host, false, "copy device data to host");
REGIST_OPTION_CREATOR(input, lar::InputOption::create_option);
......
......@@ -77,12 +77,6 @@ void WeightPreprocessOption::config_model_internel<ModelLite>(
if (weight_preprocess) {
LITE_WARN("enable weight-preprocess optimization");
model->get_config().options.weight_preprocess = true;
//! FIXME: algo searcher enable weight preprocess for opencl(
//! implement below has some problem);
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}
......@@ -95,11 +89,6 @@ void WeightPreprocessOption::config_model_internel<ModelMdl>(
if (weight_preprocess) {
mgb_log_warn("enable weight-preprocess optimization");
graph_option.graph_opt.enable_weight_preprocess();
//! FIXME: this implemment is not right
// #if MGB_OPENCL
// megdnn::opencl::algo_searcher::AlgoSearcherBase::
// enable_weight_preprocess();
// #endif
}
}
}
......@@ -570,12 +559,6 @@ DEFINE_bool(
enable_jit, false,
" Execute supported operators with JIT(now only support NVRTC). "
"Can only be used on Nvidia GPUs");
#if MGB_ENABLE_ANDROID_NN
DEFINE_bool(
android_nn, false,
"Execute supported operators with Android NN. Can only be used "
"with --cpu.");
#endif
#if MGB_ENABLE_TENSOR_RT
DEFINE_bool(
tensorrt, false,
......
......@@ -8,10 +8,10 @@
*/
#include "plugin_options.h"
#include <map>
#include "misc.h"
#include "models/model_lite.h"
#include "models/model_mdl.h"
///////////////////// Plugin options///////////////////////////
namespace lar {
......@@ -153,7 +153,12 @@ void DebugOption::format_and_print(
auto table = mgb::TextTable(tablename);
auto&& network = model->get_lite_network();
table.padding(1);
table.align(mgb::TextTable::Align::Mid).add("type").add("name").add("shape").eor();
table.align(mgb::TextTable::Align::Mid)
.add("type")
.add("name")
.add("shape")
.add("dtype")
.eor();
auto to_string = [&](lite::Layout& layout) {
std::string shape("{");
......@@ -165,6 +170,19 @@ void DebugOption::format_and_print(
shape.append("}");
return shape;
};
auto get_dtype = [&](lite::Layout& layout) {
std::map<LiteDataType, std::string> type_map = {
{LiteDataType::LITE_FLOAT, "float32"},
{LiteDataType::LITE_HALF, "float16"},
{LiteDataType::LITE_INT64, "int64"},
{LiteDataType::LITE_INT, "int32"},
{LiteDataType::LITE_UINT, "uint32"},
{LiteDataType::LITE_INT16, "int16"},
{LiteDataType::LITE_UINT16, "uint16"},
{LiteDataType::LITE_INT8, "int8"},
{LiteDataType::LITE_UINT8, "uint8"}};
return type_map[layout.data_type];
};
auto input_name = network->get_all_input_name();
for (auto& i : input_name) {
......@@ -173,6 +191,7 @@ void DebugOption::format_and_print(
.add("INPUT")
.add(i)
.add(to_string(layout))
.add(get_dtype(layout))
.eor();
}
......@@ -183,6 +202,7 @@ void DebugOption::format_and_print(
.add("OUTPUT")
.add(i)
.add(to_string(layout))
.add(get_dtype(layout))
.eor();
}
......@@ -196,13 +216,28 @@ void DebugOption::format_and_print(
const std::string& tablename, std::shared_ptr<ModelMdl> model) {
auto table = mgb::TextTable(tablename);
table.padding(1);
table.align(mgb::TextTable::Align::Mid).add("type").add("name").add("shape").eor();
table.align(mgb::TextTable::Align::Mid)
.add("type")
.add("name")
.add("shape")
.add("dtype")
.eor();
auto get_dtype = [&](megdnn::DType data_type) {
std::map<megdnn::DTypeEnum, std::string> type_map = {
{mgb::dtype::Float32().enumv(), "float32"},
{mgb::dtype::Int32().enumv(), "int32"},
{mgb::dtype::Int16().enumv(), "int16"},
{mgb::dtype::Uint16().enumv(), "uint16"},
{mgb::dtype::Int8().enumv(), "int8"},
{mgb::dtype::Uint8().enumv(), "uint8"}};
return type_map[data_type.enumv()];
};
for (auto&& i : model->get_mdl_load_result().tensor_map) {
table.align(mgb::TextTable::Align::Mid)
.add("INPUT")
.add(i.first)
.add(i.second->shape().to_string())
.add(get_dtype(i.second->dtype()))
.eor();
}
......@@ -211,6 +246,7 @@ void DebugOption::format_and_print(
.add("OUTPUT")
.add(i.node()->name())
.add(i.shape().to_string())
.add(get_dtype(i.dtype()))
.eor();
}
......@@ -358,18 +394,22 @@ DEFINE_double(
DEFINE_bool(
check_dispatch, false,
"check whether an operator call dispatch on cpu comp nodes");
"check whether an operator call dispatch on cpu comp nodes This is used to "
"find potential bugs in MegDNN");
DEFINE_string(
check_var_value, "",
"--check-var-value [interval]|[interval:init_idx], Enable "
"VarValueChecker plugin. Refer to its doc for more details");
"VarValueChecker plugin. check values of all vars in a graph from given var "
"ID(init_idx) with step interval");
#if MGB_ENABLE_JSON
DEFINE_string(
profile, "",
"Write profiling result to given file. The output file is in "
"JSON format");
DEFINE_string(profile_host, "", "focus on host time profiling For some backends");
DEFINE_string(
profile_host, "",
"focus on host time profiling For some backends(such as openCL)");
#endif
///////////////////// Debug gflags///////////////////////////
......
......@@ -94,15 +94,16 @@ void NormalStrategy::run_subline() {
m_runtime_param.stage = RunStage::AFTER_RUNNING_WAIT;
stage_config_model();
auto cur = timer.get_msecs();
printf("iter %lu/%lu: %.3fms (exec=%.3fms)\n", i, run_num, cur, exec_time);
printf("iter %lu/%lu: %.3f ms (exec=%.3f ms)\n", i, run_num, cur,
exec_time);
time_sum += cur;
time_sqrsum += cur * cur;
fflush(stdout);
min_time = std::min(min_time, cur);
max_time = std::max(max_time, cur);
}
printf("\n=== finished test #%u: time=%.3fms avg_time=%.3fms "
"sexec=%.3fms min=%.3fms max=%.3fms\n\n",
printf("\n=== finished test #%u: time=%.3f ms avg_time=%.3f ms "
"sexec=%.3f ms min=%.3f ms max=%.3f ms\n\n",
idx, time_sum, time_sum / run_num,
std::sqrt(
(time_sqrsum * run_num - time_sum * time_sum) /
......@@ -121,7 +122,7 @@ void NormalStrategy::run_subline() {
m_runtime_param.stage = RunStage::MODEL_RUNNING;
stage_config_model();
if (!idx) {
if (idx == 0) {
warm_up();
}
tot_time += run_iter(idx);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册