未验证 提交 76356ff6 编写于 作者: I iamWHTWD 提交者: GitHub

Update sa_nas_mobilenetv2.py

上级 fc0882b8
import sys #!/usr/bin/env bash
sys.path.append('..') ##################
import numpy as np #bash slim_ci_demo_all_case.sh $5 $6;
import argparse
import ast print_info(){
import time if [ $1 -ne 0 ];then
import argparse mv ${log_path}/$2 ${log_path}/FAIL_$2.log
import ast echo -e "\033[31m ${log_path}/FAIL_$2 \033[0m"
import logging echo "fail log as follow"
import paddle cat ${log_path}/FAIL_$2.log
import paddle.nn as nn else
import paddle.static as static mv ${log_path}/$2 ${log_path}/SUCCESS_$2.log
import paddle.nn.functional as F echo -e "\033[32m ${log_path}/SUCCESS_$2 \033[0m"
import paddle.vision.transforms as T cat ${log_path}/SUCCESS_$2.log
from paddle import ParamAttr fi
from paddleslim.analysis import flops }
from paddleslim.nas import SANAS
from paddleslim.common import get_logger catchException() {
from optimizer import create_optimizer echo $1 failed due to exception >> FAIL_Exception.log
import imagenet_reader }
_logger = get_logger(__name__, level=logging.INFO) cudaid1=$1;
cudaid2=$2;
echo "cudaid1,cudaid2", ${cudaid1}, ${cudaid2}
def build_program(main_program, export CUDA_VISIBLE_DEVICES=${cudaid1}
startup_program, #分布式log输出方式
image_shape, export PADDLE_LOG_LEVEL=debug
dataset,
archs, export FLAGS_fraction_of_gpu_memory_to_use=0.98
args, # data PaddleSlim/demo/data/ILSVRC2012
places, cd ${slim_dir}/demo
is_test=False): if [ -d "data" ];then
with static.program_guard(main_program, startup_program): rm -rf data
with paddle.utils.unique_name.guard(): fi
data_shape = [None] + image_shape wget -q https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz --no-check-certificate
data = static.data(name='data', shape=data_shape, dtype='float32') tar xf ILSVRC2012_data_demo.tar.gz
label = static.data(name='label', shape=[None, 1], dtype='int64') mv ILSVRC2012_data_demo data
if args.data == 'cifar10': # download pretrain model
paddle.assign(paddle.reshape(label, [-1, 1]), label) root_url="http://paddle-imagenet-models-name.bj.bcebos.com"
if is_test: pre_models="MobileNetV1 MobileNetV2 MobileNetV3_large_x1_0_ssld ResNet101_vd MobileNetV2 ResNet34 ResNet50 ResNet50_vd"
data_loader = paddle.io.DataLoader( if [ -d "pretrain" ];then
dataset, rm -rf pretrain
places=places, fi
feed_list=[data, label], mkdir pretrain && cd pretrain
drop_last=False, for model in ${pre_models}
batch_size=args.batch_size, do
return_list=False, if [ ! -f ${model} ]; then
shuffle=False) wget -q ${root_url}/${model}_pretrained.tar
else: tar xf ${model}_pretrained.tar
data_loader = paddle.io.DataLoader( fi
dataset, done
places=places,
feed_list=[data, label], # 1 dist
drop_last=True, demo_distillation_01(){
batch_size=args.batch_size, cd ${slim_dir}/demo/distillation || catchException demo_distillation
return_list=False, if [ -d "output" ];then
shuffle=True, rm -rf output
use_shared_memory=True, fi
num_workers=4) export CUDA_VISIBLE_DEVICES=${cudaid1}
output = archs(data) python distill.py --num_epochs 1 --save_inference True >${log_path}/demo_distillation_ResNet50_vd_T 2>&1
output = static.nn.fc(x=output, size=args.class_dim) print_info $? demo_distillation_ResNet50_vd_T
softmax_out = F.softmax(output) }
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost) demo_distillation_02(){
acc_top1 = paddle.metric.accuracy( cd ${slim_dir}/demo/distillation || catchException demo_distillation
input=softmax_out, label=label, k=1) if [ -d "output" ];then
acc_top5 = paddle.metric.accuracy( rm -rf output
input=softmax_out, label=label, k=5) fi
if is_test == False: export CUDA_VISIBLE_DEVICES=${cudaid1}
optimizer = create_optimizer(args) python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
optimizer.minimize(avg_cost) --model ResNet50 --teacher_model ResNet101_vd \
return data_loader, avg_cost, acc_top1, acc_top5 --teacher_pretrained_model ../pretrain/ResNet101_vd_pretrained >${log_path}/demo_distillation_ResNet101_vd_ResNet50_T 2>&1
print_info $? demo_distillation_ResNet101_vd_ResNet50_T
def search_mobilenetv2(config, args, image_size, is_server=True): python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
image_shape = [3, image_size, image_size] --model MobileNetV2_x0_25 --teacher_model MobileNetV2 \
if args.data == 'cifar10': --teacher_pretrained_model ../pretrain/MobileNetV2_pretrained >${log_path}/demo_distillation_MobileNetV2_MobileNetV2_x0_25_T 2>&1
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) print_info $? demo_distillation_MobileNetV2_MobileNetV2_x0_25_T
train_dataset = paddle.vision.datasets.Cifar10( }
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10( demo_deep_mutual_learning(){
mode='test', transform=transform, backend='cv2') cd ${slim_dir}/demo/deep_mutual_learning || catchException demo_deep_mutual_learning
export CUDA_VISIBLE_DEVICES=${cudaid1}
elif args.data == 'imagenet': model=dml_mv1_mv1_gpu1
train_dataset = imagenet_reader.ImageNetDataset(mode='train') CUDA_VISIBLE_DEVICES=${cudaid1}
val_dataset = imagenet_reader.ImageNetDataset(mode='val') python dml_train.py --epochs 1 >${log_path}/${model} 2>&1
print_info $? ${model}
places = static.cuda_places() if args.use_gpu else static.cpu_places() model=dml_mv1_res50_gpu1
place = places[0] CUDA_VISIBLE_DEVICES=${cudaid1}
if is_server: python dml_train.py --models='mobilenet-resnet50' --batch_size 128 --epochs 1 >${log_path}/${model} 2>&1
### start a server and a client print_info $? ${model}
sa_nas = SANAS( }
config,
server_addr=(args.server_address, args.port), all_distillation(){ # 大数据 5个模型
search_steps=args.search_steps, demo_distillation_01 # 3
is_server=True) #demo_distillation_02
else: #demo_deep_mutual_learning # 2
### start a client }
sa_nas = SANAS( # 2.1 quant/quant_aware 使用小数据集即可
config, demo_quant_quant_aware(){
server_addr=(args.server_address, args.port), cd ${slim_dir}/demo/quant/quant_aware || catchException demo_quant_quant_aware
search_steps=args.search_steps, if [ -d "output" ];then
is_server=False) rm -rf output
fi
for step in range(args.search_steps): export CUDA_VISIBLE_DEVICES=${cudaid1}
archs = sa_nas.next_archs()[0] # 2.1版本时默认BS=256会报显存不足,故暂时修改成128
python train.py --model MobileNet --pretrained_model ../../pretrain/MobileNetV1_pretrained \
train_program = static.Program() --checkpoint_dir ./output/mobilenetv1 --num_epochs 1 --batch_size 128 >${log_path}/demo_quant_quant_aware_v1 2>&1
test_program = static.Program() print_info $? demo_quant_quant_aware_v1
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program( export CUDA_VISIBLE_DEVICES=${cudaid1}
train_program, startup_program, image_shape, train_dataset, archs, python train.py --model ResNet34 \
args, places) --pretrained_model ../../pretrain/ResNet34_pretrained \
--checkpoint_dir ./output/ResNet34 --num_epochs 1 >${log_path}/demo_quant_quant_aware_ResNet34_T 2>&1
current_flops = flops(train_program) print_info $? demo_quant_quant_aware_ResNet34_T
print('step: {}, current_flops: {}'.format(step, current_flops)) }
if current_flops > int(321208544): # 2.2 quant/quant_embedding
continue demo_quant_quant_embedding(){
cd ${slim_dir}/demo/quant/quant_embedding || catchException demo_quant_quant_embedding
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( export CUDA_VISIBLE_DEVICES=${cudaid1}
test_program, # 先使用word2vec的demo数据进行一轮训练,比较量化前infer结果同量化后infer结果different
startup_program, if [ -d "data" ];then
image_shape, rm -rf data
val_dataset, fi
archs, wget -q https://sys-p0.bj.bcebos.com/slim_ci/word_2evc_demo_data.tar.gz --no-check-certificate
args, tar xf word_2evc_demo_data.tar.gz
place, mv word_2evc_demo_data data
is_test=True) if [ -d "v1_cpu5_b100_lr1dir" ];then
test_program = test_program.clone(for_test=True) rm -rf v1_cpu5_b100_lr1dir
fi
exe = static.Executor(place) OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 \
exe.run(startup_program) --dict_path data/test_build_dict --num_passes 1 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir \
--base_lr 1.0 --print_batch 1000 --with_speed --is_sparse >${log_path}/quant_em_word2vec_T 2>&1
build_strategy = static.BuildStrategy() print_info $? quant_em_word2vec_T
train_compiled_program = static.CompiledProgram( # 量化前infer
train_program).with_data_parallel( python infer.py --infer_epoch --test_dir data/test_mid_dir \
loss_name=avg_cost.name, build_strategy=build_strategy) --dict_path data/test_build_dict_word_to_id_ \
for epoch_id in range(args.retain_epoch): --batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ \
for batch_id, data in enumerate(train_loader()): --start_index 0 --last_index 0 >${log_path}/quant_em_infer1 2>&1
fetches = [avg_cost.name] print_info $? quant_em_infer1
s_time = time.time() # 量化后infer
outs = exe.run(train_compiled_program, python infer.py --infer_epoch --test_dir data/test_mid_dir \
feed=data, --dict_path data/test_build_dict_word_to_id_ \
fetch_list=fetches)[0] --batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 \
batch_time = time.time() - s_time --last_index 0 --emb_quant True >${log_path}/quant_em_infer2 2>&1
if batch_id % 10 == 0: print_info $? quant_em_infer2
_logger.info( }
'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. # 2.3 quan_post # 小数据集
format(step, epoch_id, batch_id, outs[0], batch_time)) demo_quant_quant_post(){
# 20210425 新增4种离线量化方法
reward = [] cd ${slim_dir}/demo/quant/quant_post || catchException demo_quant_quant_post
for batch_id, data in enumerate(test_loader()): export CUDA_VISIBLE_DEVICES=${cudaid1}
test_fetches = [ # 1 导出模型
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name python export_model.py --model "MobileNet" --pretrained_model ../../pretrain/MobileNetV1_pretrained \
] --data imagenet >${log_path}/st_quant_post_v1_export 2>&1
batch_reward = exe.run(test_program, print_info $? st_quant_post_v1_export
feed=data, # 量化前eval
fetch_list=test_fetches) python eval.py --model_path ./inference_model/MobileNet --model_name model \
reward_avg = np.mean(np.array(batch_reward), axis=1) --params_name weights >${log_path}/st_quant_post_v1_eval1 2>&1
reward.append(reward_avg) print_info $? st_quant_post_v1_eval1
_logger.info( # 3 离线量化
'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. # 4 量化后eval
format(step, batch_id, batch_reward[0], batch_reward[1], for algo in hist avg mse
batch_reward[2])) do
## 不带bc 离线量化
finally_reward = np.mean(np.array(reward), axis=0) echo "quant_post train no bc " ${algo}
_logger.info( python quant_post.py --model_path ./inference_model/MobileNet \
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( --save_path ./quant_model/${algo}/MobileNet \
finally_reward[0], finally_reward[1], finally_reward[2])) --model_filename model --params_filename weights --algo ${algo} >${log_path}/st_quant_post_v1_T_${algo} 2>&1
print_info $? st_quant_post_v1_T_${algo}
sa_nas.reward(float(finally_reward[1])) # 量化后eval
echo "quant_post eval no bc " ${algo}
python eval.py --model_path ./quant_model/${algo}/MobileNet --model_name __model__ \
def test_search_result(tokens, image_size, args, config): --params_name __params__ > ${log_path}/st_quant_post_${algo}_eval2 2>&1
places = static.cuda_places() if args.use_gpu else static.cpu_places() print_info $? st_quant_post_${algo}_eval2
place = places[0]
# 带bc参数的 离线量化
sa_nas = SANAS( echo "quant_post train bc " ${algo}
config, python quant_post.py --model_path ./inference_model/MobileNet \
server_addr=(args.server_address, args.port), --save_path ./quant_model/${algo}_bc/MobileNet \
search_steps=args.search_steps, --model_filename model --params_filename weights \
is_server=True) --algo ${algo} --bias_correction True >${log_path}/st_quant_post_T_${algo}_bc 2>&1
print_info $? st_quant_post_T_${algo}_bc
image_shape = [3, image_size, image_size]
if args.data == 'cifar10': # 量化后eval
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) echo "quant_post eval bc " ${algo}
train_dataset = paddle.vision.datasets.Cifar10( python eval.py --model_path ./quant_model/${algo}_bc/MobileNet --model_name __model__ \
mode='train', transform=transform, backend='cv2') --params_name __params__ > ${log_path}/st_quant_post_${algo}_bc_eval2 2>&1
val_dataset = paddle.vision.datasets.Cifar10( print_info $? st_quant_post_${algo}_bc_eval2
mode='test', transform=transform, backend='cv2')
done
elif args.data == 'imagenet': }
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val') # 2.3 quant_post_hpo # 小数据集
demo_quant_quant_post_hpo(){
archs = sa_nas.tokens2arch(tokens)[0]
cd ${slim_dir}/demo/quant/quant_post_hpo || catchException demo_quant_quant_post_hpo
train_program = static.Program() export CUDA_VISIBLE_DEVICES=${cudaid1}
test_program = static.Program() # 1.导出模型
startup_program = static.Program() python ../quant_post/export_model.py \
train_loader, avg_cost, acc_top1, acc_top5 = build_program( --model "MobileNet" \
train_program, startup_program, image_shape, train_dataset, archs, args, --pretrained_model ../../pretrain/MobileNetV1_pretrained \
places) --data imagenet > ${log_path}/st_quant_post__hpo_v1_export 2>&1
print_info $? st_quant_post__hpo_v1_export
current_flops = flops(train_program) # 2. quant_post_hpo 设置max_model_quant_count=2
print('current_flops: {}'.format(current_flops)) python quant_post_hpo.py \
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( --use_gpu=True \
test_program, --model_path="./inference_model/MobileNet/" \
startup_program, --save_path="./inference_model/MobileNet_quant/" \
image_shape, --model_filename="model" \
val_dataset, --params_filename="weights" \
archs, --max_model_quant_count=2 > ${log_path}/st_quant_post_hpo 2>&1
args, print_info $? st_quant_post_hpo
place, # 3. 量化后eval
is_test=True) python ../quant_post/eval.py \
--model_path ./inference_model/MobileNet_quant \
test_program = test_program.clone(for_test=True) --model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_hpo_eval 2>&1
exe = static.Executor(place) print_info $? st_quant_post_hpo_eval
exe.run(startup_program)
}
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram( #2.4
train_program).with_data_parallel( demo_quant_pact_quant_aware(){
loss_name=avg_cost.name, build_strategy=build_strategy) cd ${slim_dir}/demo/quant/pact_quant_aware || catchException demo_quant_pact_quant_aware
for epoch_id in range(args.retain_epoch): export CUDA_VISIBLE_DEVICES=${cudaid1}
for batch_id, data in enumerate(train_loader()): # 普通量化,使用小数据集即可
fetches = [avg_cost.name] # 2.1版本时默认BS=128 会报显存不足,故暂时修改成64
s_time = time.time() python train.py --model MobileNetV3_large_x1_0 \
outs = exe.run(train_compiled_program, --pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
feed=data, --num_epochs 1 --lr 0.0001 --use_pact False --batch_size 128 >${log_path}/demo_quant_pact_quant_aware_v3_nopact 2>&1
fetch_list=fetches)[0] print_info $? demo_quant_pact_quant_aware_v3_nopact
batch_time = time.time() - s_time python train.py --model MobileNetV3_large_x1_0 \
if batch_id % 10 == 0: --pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
_logger.info( --num_epochs 1 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. --step_epochs 2 --l2_decay 1e-5 >${log_path}/demo_quant_pact_quant_aware_v3 2>&1
format(epoch_id, batch_id, outs[0], batch_time)) print_info $? demo_quant_pact_quant_aware_v3
# load
reward = [] python train.py --model MobileNetV3_large_x1_0 \
for batch_id, data in enumerate(test_loader()): --pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
test_fetches = [ --num_epochs 2 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name --step_epochs 20 --l2_decay 1e-5 \
] --checkpoint_dir ./output/MobileNetV3_large_x1_0/0 \
batch_reward = exe.run(test_program, --checkpoint_epoch 0 >${log_path}/demo_quant_pact_quant_aware_v3_load 2>&1
feed=data, print_info $? demo_quant_pact_quant_aware_v3_load
fetch_list=test_fetches) }
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg) # 2.5
demo_dygraph_quant(){
_logger.info( cd ${slim_dir}/demo/dygraph/quant || catchException demo_dygraph_quant
'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \
format(batch_id, batch_reward[0], batch_reward[1], batch_reward[ --pretrained_model '../../pretrain/MobileNetV1_pretrained' \
2])) --num_epochs 1 \
--batch_size 128 \
finally_reward = np.mean(np.array(reward), axis=0) > ${log_path}/dy_quant_v1_gpu1 2>&1
_logger.info( print_info $? dy_quant_v1_gpu1
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( # dy_pact_v3
finally_reward[0], finally_reward[1], finally_reward[2])) CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --lr=0.001 \
--batch_size 128 \
--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
if __name__ == '__main__': --pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
parser = argparse.ArgumentParser( print_info $? dy_pact_quant_v3_gpu1
description='SA NAS MobileNetV2 cifar10 argparase') # 多卡训练,以0到3号卡为例
parser.add_argument( CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
'--use_gpu', train.py --lr=0.001 \
type=ast.literal_eval, --pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
default=True, --use_pact=True --num_epochs=1 \
help='Whether to use GPU in train/test model.') --l2_decay=2e-5 \
parser.add_argument( --ls_epsilon=0.1 \
'--batch_size', type=int, default=256, help='batch size.') --batch_size=128 \
parser.add_argument( --model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1
'--class_dim', type=int, default=10, help='classify number.') print_info $? dy_pact_quant_v3_gpu4
parser.add_argument( }
'--data', # 2.6
type=str, ce_tests_dygraph_qat(){
default='cifar10', cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat
choices=['cifar10', 'imagenet'], ln -s ${slim_dir}/demo/data/ILSVRC2012
help='server address.') test_samples=1000 # if set as -1, use all test samples
parser.add_argument( data_path='./ILSVRC2012/'
'--is_server', batch_size=16
type=ast.literal_eval, epoch=1
default=True, lr=0.0001
help='Whether to start a server.') num_workers=1
parser.add_argument( output_dir=$PWD/output_models
'--search_steps', for model in mobilenet_v1
type=int, do
default=100, # if [ $1 == nopact ];then
help='controller server number.') # 1 quant train
parser.add_argument( echo "------1 nopact train--------", ${model}
'--server_address', type=str, default="", help='server ip.') export CUDA_VISIBLE_DEVICES=${cudaid1}
parser.add_argument('--port', type=int, default=8881, help='server port') python ./src/qat.py \
parser.add_argument( --arch=${model} \
'--retain_epoch', type=int, default=5, help='epoch for each token.') --data=${data_path} \
parser.add_argument('--lr', type=float, default=0.1, help='learning rate.') --epoch=${epoch} \
args = parser.parse_args() --batch_size=32 \
print(args) --num_workers=${num_workers} \
--lr=${lr} \
if args.data == 'cifar10': --output_dir=${output_dir} \
image_size = 32 --enable_quant > qat_${model}_gpu1_nw1 2>&1
block_num = 3 # 2 eval before save quant
elif args.data == 'imagenet': echo "--------2 eval before save quant -------------", ${model}
image_size = 224 python ./src/eval.py \
block_num = 6 --model_path=./output_models/quant_dygraph/${model} \
else: --data_dir=${data_path} \
raise NotImplementedError( --test_samples=${test_samples} \
'data must in [cifar10, imagenet], but received: {}'.format( --batch_size=${batch_size} > eval_before_save_${model} 2>&1
args.data)) # 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
echo "--------3 save_nopact_quant_model-------------", ${model}
config = [('MobileNetV2Space')] python src/save_quant_model.py \
paddle.enable_static() --load_model_path output_models/quant_dygraph/${model} \
search_mobilenetv2(config, args, image_size, is_server=args.is_server) --save_model_path int8_models/${model} > save_quant_${model} 2>&1
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_qat(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=16
epoch=1
lr=0.0001
num_workers=1
output_dir=$PWD/output_models
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
# if [ $1 == nopact ];then
# 1 quant train
echo "------1 nopact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=${output_dir} \
--enable_quant > qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_save_${model} 2>&1
# 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
echo "--------3 save_nopact_quant_model-------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models/quant_dygraph/${model} \
--save_model_path int8_models/${model} > save_quant_${model} 2>&1
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_ptq(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_ptq4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=32
epoch=1
output_dir="./output_ptq"
quant_batch_num=10
quant_batch_size=10
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
echo "--------quantize model: ${model}-------------"
export CUDA_VISIBLE_DEVICES=${cudaid1}
# save ptq quant model
python ./src/ptq.py \
--data=${data_path} \
--arch=${model} \
--quant_batch_num=${quant_batch_num} \
--quant_batch_size=${quant_batch_size} \
--output_dir=${output_dir} > ${log_path}/ptq_${model} 2>&1
print_info $? ptq_${model}
echo "-------- eval fp32_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/fp32_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=True \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_fp32_${model} 2>&1
print_info $? ptq_eval_fp32_${model}
echo "-------- eval int8_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/int8_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=False \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_int8_${model} 2>&1
print_info $? ptq_eval_int8_${model}
done
}
#用于更新release分支下无ce_tests_dygraph_ptq case;release分支设置is_develop="False"
is_develop="True"
all_quant(){ # 10个模型
if [ "${is_develop}" == "True" ];then
#ce_tests_dygraph_ptq4
ce_tests_dygraph_ptq
fi
demo_quant_quant_aware # 2个模型
demo_quant_quant_embedding # 1个模型
demo_quant_quant_post # 4个策略
demo_dygraph_quant # 2个模型
demo_quant_pact_quant_aware # 1个模型
ce_tests_dygraph_qat # 4个模型
#ce_tests_dygraph_qat4
demo_quant_quant_post_hpo
}
# 3 prune
demo_prune(){
cd ${slim_dir}/demo/prune || catchException demo_prune
# 3.1 P0 prune
if [ -d "models" ];then
rm -rf models
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py --model "MobileNet" --pruned_ratio 0.31 --data "imagenet" \
--pretrained_model ../pretrain/MobileNetV1_pretrained/ --num_epochs 1 >${log_path}/prune_v1_T 2>&1
print_info $? prune_v1_T
#3.2 prune_fpgm
# slim_prune_fpgm_v1_T
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="MobileNet" \
# --pretrained_model="../pretrain/MobileNetV1_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.1 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 90\
# --l2_decay=3e-5 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./fpgm_mobilenetv1_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_v1_T 2>&1
# print_info $? slim_prune_fpgm_v1_T
#slim_prune_fpgm_v2_T
export CUDA_VISIBLE_DEVICES=${cudaid1}
#v2 -50%
python train.py \
--model="MobileNetV2" \
--pretrained_model="../pretrain/MobileNetV2_pretrained" \
--data="imagenet" \
--pruned_ratio=0.325 \
--lr=0.001 \
--num_epochs=2 \
--test_period=1 \
--step_epochs 30 60 80 \
--l2_decay=1e-4 \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_mobilenetv2_models" \
--save_inference True >${log_path}/slim_prune_fpgm_v2_T 2>&1
print_info $? slim_prune_fpgm_v2_T
python eval.py --model "MobileNetV2" --data "imagenet" \
--model_path "./output/fpgm_mobilenetv2_models/0" >${log_path}/slim_prune_fpgm_v2_eval 2>&1
print_info $? slim_prune_fpgm_v2_eval
# ResNet34 -50
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="ResNet34" \
# --pretrained_model="../pretrain/ResNet34_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.001 \
# --num_epochs=2 \
# --test_period=1 \
# --step_epochs 30 60 \
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./output/fpgm_resnet34_50_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_resnet34_50_T 2>&1
print_info $? slim_prune_fpgm_resnet34_50_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_50_models/0" >${log_path}/slim_prune_fpgm_resnet34_50_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_50_eval
# ResNet34 -42 slim_prune_fpgm_resnet34_42_T
cd ${slim_dir}/demo/prune
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--model="ResNet34" \
--pretrained_model="../pretrain/ResNet34_pretrained" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_resnet34_025_120_models" \
--save_inference True >${log_path}/slim_prune_fpgm_resnet34_42_T 2>&1
print_info $? slim_prune_fpgm_resnet34_42_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_025_120_models/0" >${log_path}/slim_prune_fpgm_resnet34_42_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_42_eval
# 3.3 prune ResNet50
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 2.1版本时默认BS=256 会报显存不足,故暂时修改成128
python train.py --model ResNet50 --pruned_ratio 0.31 --data "imagenet" \
--save_inference True --pretrained_model ../pretrain/ResNet50_pretrained \
--num_epochs 1 --batch_size 128 >${log_path}/prune_ResNet50_T 2>&1
print_info $? prune_ResNet50_T
}
# 3.4 dygraph_prune
#dy_prune_ResNet34_f42
demo_dygraph_pruning(){
cd ${slim_dir}/demo/dygraph/pruning || catchException demo_dygraph_pruning
ln -s ${slim_dir}/demo/data data
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=1 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet34_f42_gpu1 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1
#2.3 恢复训练 通过设置checkpoint选项进行恢复训练:
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" \
--checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet34_f42_gpu1_load 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_load
#2.4. 评估 通过调用eval.py脚本,对剪裁和重训练后的模型在测试数据上进行精度:
CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
--checkpoint=./fpgm_resnet34_025_120_models/1 \
--model="resnet34" \
--pruned_ratio=0.25 \
--batch_size=128 >${log_path}/dy_prune_ResNet34_f42_gpu1_eval 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_eval
#2.5. 导出模型 执行以下命令导出用于预测的模型:
CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
--checkpoint=./fpgm_resnet34_025_120_models/final \
--model="resnet34" \
--pruned_ratio=0.25 \
--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet34_f42_gpu1_export 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_export
#add dy_prune_fpgm_mobilenetv1_50_T
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_mobilenetv1_train_log" \
train.py \
--model="mobilenet_v1" \
--data="imagenet" \
--pruned_ratio=0.3125 \
--lr=0.1 \
--num_epochs=1 \
--test_period=1 \
--step_epochs 30 60 90\
--l2_decay=3e-5 \
--lr_strategy="piecewise_decay" \
--criterion="fpgm" \
--model_path="./fpgm_mobilenetv1_models" > ${log_path}/dy_prune_fpgm_mobilenetv1_50_T 2>&1
print_info $? dy_prune_fpgm_mobilenetv1_50_T
#add dy_prune_fpgm_mobilenetv2_50_T
# CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
# --log_dir="fpgm_mobilenetv2_train_log" \
# train.py \
# --model="mobilenet_v2" \
# --data="imagenet" \
# --pruned_ratio=0.325 \
# --lr=0.001 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 80\
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="fpgm" \
# --model_path="./fpgm_mobilenetv2_models" > ${log_path}/dy_prune_fpgm_mobilenetv2_50_T 2>&1
# print_info $? dy_prune_fpgm_mobilenetv2_50_T
#add
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_resnet34_f_42_train_log" \
train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--batch_size=128 \
--num_epochs=1 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet34_f42_gpu2 2>&1
print_info $? dy_prune_ResNet34_f42_gpu2
}
# 3.5 st unstructured_prune
demo_unstructured_prune(){
cd ${slim_dir}/demo/unstructured_prune || catchException demo_unstructured_prune
# 注意,上述命令中的batch_size为多张卡上总的batch_size,即一张卡的batch_size为256。
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_unstructured_models >${log_path}/st_unstructured_prune_threshold_T 2>&1
print_info $? st_unstructured_prune_threshold_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models \
--data="imagenet" >${log_path}/st_unstructured_prune_threshold_eval 2>&1
print_info $? st_unstructured_prune_threshold_eval
## sparsity: -55%, accuracy: 67%+/87%+
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_ratio_models >${log_path}/st_unstructured_prune_ratio_T 2>&1
print_info $? st_unstructured_prune_ratio_T
# MNIST数据集
# python train.py \
# --batch_size 256 \
# --pretrained_model ../pretrain/MobileNetV1_pretrained \
# --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --data mnist \
# --lr_strategy piecewise_decay \
# --step_epochs 1 2 3 \
# --num_epochs 1 \
# --test_period 1 \
# --model_period 1 \
# --model_path st_unstructured_models_mnist >${log_path}/st_unstructured_prune_threshold_mnist_T 2>&1
# print_info $? st_unstructured_prune_threshold_mnist_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models_mnist \
--data="mnist" >${log_path}/st_unstructured_prune_threshold_mnist_eval 2>&1
print_info $? st_unstructured_prune_threshold_mnist_eval
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="st_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--model MobileNet \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 5 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--prune_params_type conv1x1_only \
--pruning_strategy gmp > ${log_path}/st_unstructured_prune_ratio_gmp 2>&1
print_info $? st_unstructured_prune_ratio_gmp
}
demo_dygraph_unstructured_pruning(){
# dy_threshold
cd ${slim_dir}/demo/dygraph/unstructured_pruning || catchException demo_dygraph_unstructured_pruning
export CUDA_VISIBLE_DEVICES=${cudaid2}
## sparsity: -55%, accuracy: 67%+/87%+
python -m paddle.distributed.launch \
--log_dir train_dy_ratio_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_ratio_models >${log_path}/dy_prune_ratio_T 2>&1
print_info $? dy_prune_ratio_T
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models >${log_path}/dy_threshold_prune_T 2>&1
print_info $? dy_threshold_prune_T
# eval
python evaluate.py --pruned_model dy_threshold_models/model.pdparams \
--data imagenet >${log_path}/dy_threshold_prune_eval 2>&1
print_info $? dy_threshold_prune_eval
# load
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_load_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 3 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models_new \
--pretrained_model dy_threshold_models/model.pdparams \
--last_epoch 1 > ${log_path}/dy_threshold_prune_T_load 2>&1
print_info $? dy_threshold_prune_T_load
# cifar10
# python train.py --data cifar10 --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --model_period 1 \
# --num_epochs 2 >${log_path}/dy_threshold_prune_cifar10_T 2>&1
# print_info $? dy_threshold_prune_cifar10_T
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="dy_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 100 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--pruning_strategy gmp \
--skip_params_type exclude_conv1x1 ${log_path}/dy_unstructured_prune_ratio_gmp 2>&1
print_info $? dy_unstructured_prune_ratio_gmp
}
##################
all_prune(){ # 7个模型
demo_prune
demo_dygraph_pruning
demo_unstructured_prune # 4个模型
demo_dygraph_unstructured_pruning
}
#4 nas
demo_nas(){
# 4.1 sa_nas_mobilenetv2
cd ${slim_dir}/demo/nas || catchException demo_nas
model=demo_nas_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
}
demo_nas4(){
cd ${slim_dir}/demo/nas || catchException demo_nas4
model=sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --retain_epoch 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.2 block_sa_nas_mobilenetv2
model=block_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python block_sa_nas_mobilenetv2.py --search_steps 1 --port 8883 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.3 rl_nas
model=rl_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python rl_nas_mobilenetv2.py --search_steps 1 --port 8885 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.4 parl_nas
#model=parl_nas_v2_T_1card
#CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py \
#--search_steps 1 --port 8887 >${log_path}/${model} 2>&1
#print_info $? ${model}
}
all_nas(){ # 3 个模型
demo_nas
}
# 5 darts
# search 1card # DARTS一阶近似搜索方法
demo_darts(){
cd ${slim_dir}/demo/darts || catchException demo_darts
model=darts1_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 \
--use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
#train
model=pcdarts_train_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
# 可视化
#pip install graphviz
#model=slim_darts_visualize_pcdarts
#python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
#print_info $? ${model}
}
slimfacenet(){
cd ${slim_dir}/demo/slimfacenet || catchException slimfacenet
ln -s ${data_path}/slim/slimfacenet/CASIA CASIA
ln -s ${data_path}/slim/slimfacenet/lfw lfw
model=slim_slimfacenet_B75_train
CUDA_VISIBLE_DEVICES=${cudaid1} python -u train_eval.py \
--train_data_dir=./CASIA/ --test_data_dir=./lfw/ \
--action train --model=SlimFaceNet_B_x0_75 \
--start_epoch 0 --total_epoch 1 >${log_path}/slim_slimfacenet_B75_train 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_quan
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action quant --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_quan 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_eval
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action test --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_eval 2>&1
print_info $? ${model}
}
all_darts(){ # 2个模型
demo_darts
#slimfacenet 需要删掉
}
demo_latency(){
cd ${slim_dir}/demo/analysis || catchException demo_latency
model=latency_mobilenet_v1_fp32
python latency_predictor.py --model mobilenet_v1 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v1_int8
python latency_predictor.py --model mobilenet_v1 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_fp32
python latency_predictor.py --model mobilenet_v2 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_int8
python latency_predictor.py --model mobilenet_v2 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
}
all_latency(){
demo_latency
}
####################################
export all_case_list=(all_distillation all_quant all_prune all_nas )
export all_case_time=0
declare -A all_P0case_dic
all_case_dic=(["all_distillation"]=5 ["all_quant"]=15 ["all_prune"]=1 ["all_nas"]=30 ["all_darts"]=30 ['unstructured_prune']=15 ['dy_qat1']=1)
for key in $(echo ${!all_case_dic[*]});do
all_case_time=`expr ${all_case_time} + ${all_case_dic[$key]}`
done
set -e
echo -e "\033[35m ---- P0case_list length: ${#all_case_list[*]}, cases: ${all_case_list[*]} \033[0m"
echo -e "\033[35m ---- P0case_time: $all_case_time min \033[0m"
set +e
####################################
echo -e "\033[35m ---- start run case \033[0m"
case_num=1
for model in ${all_case_list[*]};do
echo -e "\033[35m ---- running P0case $case_num/${#all_case_list[*]}: ${model} , task time: ${all_case_list[${model}]} min \033[0m"
${model}
let case_num++
done
echo -e "\033[35m ---- end run case \033[0m"
cd ${slim_dir}/logs
FF=`ls *FAIL*|wc -l`
if [ "${FF}" -gt "0" ];then
exit 1
else
exit 0
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册