未验证 提交 76356ff6 编写于 作者: I iamWHTWD 提交者: GitHub

Update sa_nas_mobilenetv2.py

上级 fc0882b8
import sys
sys.path.append('..')
import numpy as np
import argparse
import ast
import time
import argparse
import ast
import logging
import paddle
import paddle.nn as nn
import paddle.static as static
import paddle.nn.functional as F
import paddle.vision.transforms as T
from paddle import ParamAttr
from paddleslim.analysis import flops
from paddleslim.nas import SANAS
from paddleslim.common import get_logger
from optimizer import create_optimizer
import imagenet_reader
_logger = get_logger(__name__, level=logging.INFO)
def build_program(main_program,
startup_program,
image_shape,
dataset,
archs,
args,
places,
is_test=False):
with static.program_guard(main_program, startup_program):
with paddle.utils.unique_name.guard():
data_shape = [None] + image_shape
data = static.data(name='data', shape=data_shape, dtype='float32')
label = static.data(name='label', shape=[None, 1], dtype='int64')
if args.data == 'cifar10':
paddle.assign(paddle.reshape(label, [-1, 1]), label)
if is_test:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=False,
batch_size=args.batch_size,
return_list=False,
shuffle=False)
else:
data_loader = paddle.io.DataLoader(
dataset,
places=places,
feed_list=[data, label],
drop_last=True,
batch_size=args.batch_size,
return_list=False,
shuffle=True,
use_shared_memory=True,
num_workers=4)
output = archs(data)
output = static.nn.fc(x=output, size=args.class_dim)
softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
avg_cost = paddle.mean(cost)
acc_top1 = paddle.metric.accuracy(
input=softmax_out, label=label, k=1)
acc_top5 = paddle.metric.accuracy(
input=softmax_out, label=label, k=5)
if is_test == False:
optimizer = create_optimizer(args)
optimizer.minimize(avg_cost)
return data_loader, avg_cost, acc_top1, acc_top5
def search_mobilenetv2(config, args, image_size, is_server=True):
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
if is_server:
### start a server and a client
sa_nas = SANAS(
config,
server_addr=(args.server_address, args.port),
search_steps=args.search_steps,
is_server=True)
else:
### start a client
sa_nas = SANAS(
config,
server_addr=(args.server_address, args.port),
search_steps=args.search_steps,
is_server=False)
for step in range(args.search_steps):
archs = sa_nas.next_archs()[0]
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, train_dataset, archs,
args, places)
current_flops = flops(train_program)
print('step: {}, current_flops: {}'.format(step, current_flops))
if current_flops > int(321208544):
continue
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
exe = static.Executor(place)
exe.run(startup_program)
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
format(step, epoch_id, batch_id, outs[0], batch_time))
reward = []
for batch_id, data in enumerate(test_loader()):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)
_logger.info(
'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
format(step, batch_id, batch_reward[0], batch_reward[1],
batch_reward[2]))
finally_reward = np.mean(np.array(reward), axis=0)
_logger.info(
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
finally_reward[0], finally_reward[1], finally_reward[2]))
sa_nas.reward(float(finally_reward[1]))
def test_search_result(tokens, image_size, args, config):
places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
sa_nas = SANAS(
config,
server_addr=(args.server_address, args.port),
search_steps=args.search_steps,
is_server=True)
image_shape = [3, image_size, image_size]
if args.data == 'cifar10':
transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
val_dataset = imagenet_reader.ImageNetDataset(mode='val')
archs = sa_nas.tokens2arch(tokens)[0]
train_program = static.Program()
test_program = static.Program()
startup_program = static.Program()
train_loader, avg_cost, acc_top1, acc_top5 = build_program(
train_program, startup_program, image_shape, train_dataset, archs, args,
places)
current_flops = flops(train_program)
print('current_flops: {}'.format(current_flops))
test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
test_program,
startup_program,
image_shape,
val_dataset,
archs,
args,
place,
is_test=True)
test_program = test_program.clone(for_test=True)
exe = static.Executor(place)
exe.run(startup_program)
build_strategy = static.BuildStrategy()
train_compiled_program = static.CompiledProgram(
train_program).with_data_parallel(
loss_name=avg_cost.name, build_strategy=build_strategy)
for epoch_id in range(args.retain_epoch):
for batch_id, data in enumerate(train_loader()):
fetches = [avg_cost.name]
s_time = time.time()
outs = exe.run(train_compiled_program,
feed=data,
fetch_list=fetches)[0]
batch_time = time.time() - s_time
if batch_id % 10 == 0:
_logger.info(
'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
format(epoch_id, batch_id, outs[0], batch_time))
reward = []
for batch_id, data in enumerate(test_loader()):
test_fetches = [
test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
]
batch_reward = exe.run(test_program,
feed=data,
fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
reward.append(reward_avg)
_logger.info(
'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
format(batch_id, batch_reward[0], batch_reward[1], batch_reward[
2]))
finally_reward = np.mean(np.array(reward), axis=0)
_logger.info(
'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
finally_reward[0], finally_reward[1], finally_reward[2]))
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description='SA NAS MobileNetV2 cifar10 argparase')
parser.add_argument(
'--use_gpu',
type=ast.literal_eval,
default=True,
help='Whether to use GPU in train/test model.')
parser.add_argument(
'--batch_size', type=int, default=256, help='batch size.')
parser.add_argument(
'--class_dim', type=int, default=10, help='classify number.')
parser.add_argument(
'--data',
type=str,
default='cifar10',
choices=['cifar10', 'imagenet'],
help='server address.')
parser.add_argument(
'--is_server',
type=ast.literal_eval,
default=True,
help='Whether to start a server.')
parser.add_argument(
'--search_steps',
type=int,
default=100,
help='controller server number.')
parser.add_argument(
'--server_address', type=str, default="", help='server ip.')
parser.add_argument('--port', type=int, default=8881, help='server port')
parser.add_argument(
'--retain_epoch', type=int, default=5, help='epoch for each token.')
parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
args = parser.parse_args()
print(args)
if args.data == 'cifar10':
image_size = 32
block_num = 3
elif args.data == 'imagenet':
image_size = 224
block_num = 6
else:
raise NotImplementedError(
'data must in [cifar10, imagenet], but received: {}'.format(
args.data))
config = [('MobileNetV2Space')]
paddle.enable_static()
search_mobilenetv2(config, args, image_size, is_server=args.is_server)
#!/usr/bin/env bash
##################
#bash slim_ci_demo_all_case.sh $5 $6;
print_info(){
if [ $1 -ne 0 ];then
mv ${log_path}/$2 ${log_path}/FAIL_$2.log
echo -e "\033[31m ${log_path}/FAIL_$2 \033[0m"
echo "fail log as follow"
cat ${log_path}/FAIL_$2.log
else
mv ${log_path}/$2 ${log_path}/SUCCESS_$2.log
echo -e "\033[32m ${log_path}/SUCCESS_$2 \033[0m"
cat ${log_path}/SUCCESS_$2.log
fi
}
catchException() {
echo $1 failed due to exception >> FAIL_Exception.log
}
cudaid1=$1;
cudaid2=$2;
echo "cudaid1,cudaid2", ${cudaid1}, ${cudaid2}
export CUDA_VISIBLE_DEVICES=${cudaid1}
#分布式log输出方式
export PADDLE_LOG_LEVEL=debug
export FLAGS_fraction_of_gpu_memory_to_use=0.98
# data PaddleSlim/demo/data/ILSVRC2012
cd ${slim_dir}/demo
if [ -d "data" ];then
rm -rf data
fi
wget -q https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz --no-check-certificate
tar xf ILSVRC2012_data_demo.tar.gz
mv ILSVRC2012_data_demo data
# download pretrain model
root_url="http://paddle-imagenet-models-name.bj.bcebos.com"
pre_models="MobileNetV1 MobileNetV2 MobileNetV3_large_x1_0_ssld ResNet101_vd MobileNetV2 ResNet34 ResNet50 ResNet50_vd"
if [ -d "pretrain" ];then
rm -rf pretrain
fi
mkdir pretrain && cd pretrain
for model in ${pre_models}
do
if [ ! -f ${model} ]; then
wget -q ${root_url}/${model}_pretrained.tar
tar xf ${model}_pretrained.tar
fi
done
# 1 dist
demo_distillation_01(){
cd ${slim_dir}/demo/distillation || catchException demo_distillation
if [ -d "output" ];then
rm -rf output
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
python distill.py --num_epochs 1 --save_inference True >${log_path}/demo_distillation_ResNet50_vd_T 2>&1
print_info $? demo_distillation_ResNet50_vd_T
}
demo_distillation_02(){
cd ${slim_dir}/demo/distillation || catchException demo_distillation
if [ -d "output" ];then
rm -rf output
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
--model ResNet50 --teacher_model ResNet101_vd \
--teacher_pretrained_model ../pretrain/ResNet101_vd_pretrained >${log_path}/demo_distillation_ResNet101_vd_ResNet50_T 2>&1
print_info $? demo_distillation_ResNet101_vd_ResNet50_T
python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
--model MobileNetV2_x0_25 --teacher_model MobileNetV2 \
--teacher_pretrained_model ../pretrain/MobileNetV2_pretrained >${log_path}/demo_distillation_MobileNetV2_MobileNetV2_x0_25_T 2>&1
print_info $? demo_distillation_MobileNetV2_MobileNetV2_x0_25_T
}
demo_deep_mutual_learning(){
cd ${slim_dir}/demo/deep_mutual_learning || catchException demo_deep_mutual_learning
export CUDA_VISIBLE_DEVICES=${cudaid1}
model=dml_mv1_mv1_gpu1
CUDA_VISIBLE_DEVICES=${cudaid1}
python dml_train.py --epochs 1 >${log_path}/${model} 2>&1
print_info $? ${model}
model=dml_mv1_res50_gpu1
CUDA_VISIBLE_DEVICES=${cudaid1}
python dml_train.py --models='mobilenet-resnet50' --batch_size 128 --epochs 1 >${log_path}/${model} 2>&1
print_info $? ${model}
}
all_distillation(){ # 大数据 5个模型
demo_distillation_01 # 3
#demo_distillation_02
#demo_deep_mutual_learning # 2
}
# 2.1 quant/quant_aware 使用小数据集即可
demo_quant_quant_aware(){
cd ${slim_dir}/demo/quant/quant_aware || catchException demo_quant_quant_aware
if [ -d "output" ];then
rm -rf output
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 2.1版本时默认BS=256会报显存不足,故暂时修改成128
python train.py --model MobileNet --pretrained_model ../../pretrain/MobileNetV1_pretrained \
--checkpoint_dir ./output/mobilenetv1 --num_epochs 1 --batch_size 128 >${log_path}/demo_quant_quant_aware_v1 2>&1
print_info $? demo_quant_quant_aware_v1
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py --model ResNet34 \
--pretrained_model ../../pretrain/ResNet34_pretrained \
--checkpoint_dir ./output/ResNet34 --num_epochs 1 >${log_path}/demo_quant_quant_aware_ResNet34_T 2>&1
print_info $? demo_quant_quant_aware_ResNet34_T
}
# 2.2 quant/quant_embedding
demo_quant_quant_embedding(){
cd ${slim_dir}/demo/quant/quant_embedding || catchException demo_quant_quant_embedding
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 先使用word2vec的demo数据进行一轮训练,比较量化前infer结果同量化后infer结果different
if [ -d "data" ];then
rm -rf data
fi
wget -q https://sys-p0.bj.bcebos.com/slim_ci/word_2evc_demo_data.tar.gz --no-check-certificate
tar xf word_2evc_demo_data.tar.gz
mv word_2evc_demo_data data
if [ -d "v1_cpu5_b100_lr1dir" ];then
rm -rf v1_cpu5_b100_lr1dir
fi
OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 \
--dict_path data/test_build_dict --num_passes 1 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir \
--base_lr 1.0 --print_batch 1000 --with_speed --is_sparse >${log_path}/quant_em_word2vec_T 2>&1
print_info $? quant_em_word2vec_T
# 量化前infer
python infer.py --infer_epoch --test_dir data/test_mid_dir \
--dict_path data/test_build_dict_word_to_id_ \
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ \
--start_index 0 --last_index 0 >${log_path}/quant_em_infer1 2>&1
print_info $? quant_em_infer1
# 量化后infer
python infer.py --infer_epoch --test_dir data/test_mid_dir \
--dict_path data/test_build_dict_word_to_id_ \
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 \
--last_index 0 --emb_quant True >${log_path}/quant_em_infer2 2>&1
print_info $? quant_em_infer2
}
# 2.3 quan_post # 小数据集
demo_quant_quant_post(){
# 20210425 新增4种离线量化方法
cd ${slim_dir}/demo/quant/quant_post || catchException demo_quant_quant_post
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 1 导出模型
python export_model.py --model "MobileNet" --pretrained_model ../../pretrain/MobileNetV1_pretrained \
--data imagenet >${log_path}/st_quant_post_v1_export 2>&1
print_info $? st_quant_post_v1_export
# 量化前eval
python eval.py --model_path ./inference_model/MobileNet --model_name model \
--params_name weights >${log_path}/st_quant_post_v1_eval1 2>&1
print_info $? st_quant_post_v1_eval1
# 3 离线量化
# 4 量化后eval
for algo in hist avg mse
do
## 不带bc 离线量化
echo "quant_post train no bc " ${algo}
python quant_post.py --model_path ./inference_model/MobileNet \
--save_path ./quant_model/${algo}/MobileNet \
--model_filename model --params_filename weights --algo ${algo} >${log_path}/st_quant_post_v1_T_${algo} 2>&1
print_info $? st_quant_post_v1_T_${algo}
# 量化后eval
echo "quant_post eval no bc " ${algo}
python eval.py --model_path ./quant_model/${algo}/MobileNet --model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_${algo}_eval2 2>&1
print_info $? st_quant_post_${algo}_eval2
# 带bc参数的 离线量化
echo "quant_post train bc " ${algo}
python quant_post.py --model_path ./inference_model/MobileNet \
--save_path ./quant_model/${algo}_bc/MobileNet \
--model_filename model --params_filename weights \
--algo ${algo} --bias_correction True >${log_path}/st_quant_post_T_${algo}_bc 2>&1
print_info $? st_quant_post_T_${algo}_bc
# 量化后eval
echo "quant_post eval bc " ${algo}
python eval.py --model_path ./quant_model/${algo}_bc/MobileNet --model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_${algo}_bc_eval2 2>&1
print_info $? st_quant_post_${algo}_bc_eval2
done
}
# 2.3 quant_post_hpo # 小数据集
demo_quant_quant_post_hpo(){
cd ${slim_dir}/demo/quant/quant_post_hpo || catchException demo_quant_quant_post_hpo
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 1.导出模型
python ../quant_post/export_model.py \
--model "MobileNet" \
--pretrained_model ../../pretrain/MobileNetV1_pretrained \
--data imagenet > ${log_path}/st_quant_post__hpo_v1_export 2>&1
print_info $? st_quant_post__hpo_v1_export
# 2. quant_post_hpo 设置max_model_quant_count=2
python quant_post_hpo.py \
--use_gpu=True \
--model_path="./inference_model/MobileNet/" \
--save_path="./inference_model/MobileNet_quant/" \
--model_filename="model" \
--params_filename="weights" \
--max_model_quant_count=2 > ${log_path}/st_quant_post_hpo 2>&1
print_info $? st_quant_post_hpo
# 3. 量化后eval
python ../quant_post/eval.py \
--model_path ./inference_model/MobileNet_quant \
--model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_hpo_eval 2>&1
print_info $? st_quant_post_hpo_eval
}
#2.4
demo_quant_pact_quant_aware(){
cd ${slim_dir}/demo/quant/pact_quant_aware || catchException demo_quant_pact_quant_aware
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 普通量化,使用小数据集即可
# 2.1版本时默认BS=128 会报显存不足,故暂时修改成64
python train.py --model MobileNetV3_large_x1_0 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 1 --lr 0.0001 --use_pact False --batch_size 128 >${log_path}/demo_quant_pact_quant_aware_v3_nopact 2>&1
print_info $? demo_quant_pact_quant_aware_v3_nopact
python train.py --model MobileNetV3_large_x1_0 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 1 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
--step_epochs 2 --l2_decay 1e-5 >${log_path}/demo_quant_pact_quant_aware_v3 2>&1
print_info $? demo_quant_pact_quant_aware_v3
# load
python train.py --model MobileNetV3_large_x1_0 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 2 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
--step_epochs 20 --l2_decay 1e-5 \
--checkpoint_dir ./output/MobileNetV3_large_x1_0/0 \
--checkpoint_epoch 0 >${log_path}/demo_quant_pact_quant_aware_v3_load 2>&1
print_info $? demo_quant_pact_quant_aware_v3_load
}
# 2.5
demo_dygraph_quant(){
cd ${slim_dir}/demo/dygraph/quant || catchException demo_dygraph_quant
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \
--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
--num_epochs 1 \
--batch_size 128 \
> ${log_path}/dy_quant_v1_gpu1 2>&1
print_info $? dy_quant_v1_gpu1
# dy_pact_v3
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --lr=0.001 \
--batch_size 128 \
--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
print_info $? dy_pact_quant_v3_gpu1
# 多卡训练,以0到3号卡为例
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
train.py --lr=0.001 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
--use_pact=True --num_epochs=1 \
--l2_decay=2e-5 \
--ls_epsilon=0.1 \
--batch_size=128 \
--model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1
print_info $? dy_pact_quant_v3_gpu4
}
# 2.6
ce_tests_dygraph_qat(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=16
epoch=1
lr=0.0001
num_workers=1
output_dir=$PWD/output_models
for model in mobilenet_v1
do
# if [ $1 == nopact ];then
# 1 quant train
echo "------1 nopact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=${output_dir} \
--enable_quant > qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_save_${model} 2>&1
# 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
echo "--------3 save_nopact_quant_model-------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models/quant_dygraph/${model} \
--save_model_path int8_models/${model} > save_quant_${model} 2>&1
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_qat(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=16
epoch=1
lr=0.0001
num_workers=1
output_dir=$PWD/output_models
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
# if [ $1 == nopact ];then
# 1 quant train
echo "------1 nopact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=${output_dir} \
--enable_quant > qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_save_${model} 2>&1
# 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
echo "--------3 save_nopact_quant_model-------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models/quant_dygraph/${model} \
--save_model_path int8_models/${model} > save_quant_${model} 2>&1
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_ptq(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_ptq4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=32
epoch=1
output_dir="./output_ptq"
quant_batch_num=10
quant_batch_size=10
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
echo "--------quantize model: ${model}-------------"
export CUDA_VISIBLE_DEVICES=${cudaid1}
# save ptq quant model
python ./src/ptq.py \
--data=${data_path} \
--arch=${model} \
--quant_batch_num=${quant_batch_num} \
--quant_batch_size=${quant_batch_size} \
--output_dir=${output_dir} > ${log_path}/ptq_${model} 2>&1
print_info $? ptq_${model}
echo "-------- eval fp32_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/fp32_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=True \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_fp32_${model} 2>&1
print_info $? ptq_eval_fp32_${model}
echo "-------- eval int8_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/int8_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=False \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_int8_${model} 2>&1
print_info $? ptq_eval_int8_${model}
done
}
#用于更新release分支下无ce_tests_dygraph_ptq case;release分支设置is_develop="False"
is_develop="True"
all_quant(){ # 10个模型
if [ "${is_develop}" == "True" ];then
#ce_tests_dygraph_ptq4
ce_tests_dygraph_ptq
fi
demo_quant_quant_aware # 2个模型
demo_quant_quant_embedding # 1个模型
demo_quant_quant_post # 4个策略
demo_dygraph_quant # 2个模型
demo_quant_pact_quant_aware # 1个模型
ce_tests_dygraph_qat # 4个模型
#ce_tests_dygraph_qat4
demo_quant_quant_post_hpo
}
# 3 prune
demo_prune(){
cd ${slim_dir}/demo/prune || catchException demo_prune
# 3.1 P0 prune
if [ -d "models" ];then
rm -rf models
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py --model "MobileNet" --pruned_ratio 0.31 --data "imagenet" \
--pretrained_model ../pretrain/MobileNetV1_pretrained/ --num_epochs 1 >${log_path}/prune_v1_T 2>&1
print_info $? prune_v1_T
#3.2 prune_fpgm
# slim_prune_fpgm_v1_T
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="MobileNet" \
# --pretrained_model="../pretrain/MobileNetV1_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.1 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 90\
# --l2_decay=3e-5 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./fpgm_mobilenetv1_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_v1_T 2>&1
# print_info $? slim_prune_fpgm_v1_T
#slim_prune_fpgm_v2_T
export CUDA_VISIBLE_DEVICES=${cudaid1}
#v2 -50%
python train.py \
--model="MobileNetV2" \
--pretrained_model="../pretrain/MobileNetV2_pretrained" \
--data="imagenet" \
--pruned_ratio=0.325 \
--lr=0.001 \
--num_epochs=2 \
--test_period=1 \
--step_epochs 30 60 80 \
--l2_decay=1e-4 \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_mobilenetv2_models" \
--save_inference True >${log_path}/slim_prune_fpgm_v2_T 2>&1
print_info $? slim_prune_fpgm_v2_T
python eval.py --model "MobileNetV2" --data "imagenet" \
--model_path "./output/fpgm_mobilenetv2_models/0" >${log_path}/slim_prune_fpgm_v2_eval 2>&1
print_info $? slim_prune_fpgm_v2_eval
# ResNet34 -50
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="ResNet34" \
# --pretrained_model="../pretrain/ResNet34_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.001 \
# --num_epochs=2 \
# --test_period=1 \
# --step_epochs 30 60 \
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./output/fpgm_resnet34_50_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_resnet34_50_T 2>&1
print_info $? slim_prune_fpgm_resnet34_50_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_50_models/0" >${log_path}/slim_prune_fpgm_resnet34_50_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_50_eval
# ResNet34 -42 slim_prune_fpgm_resnet34_42_T
cd ${slim_dir}/demo/prune
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--model="ResNet34" \
--pretrained_model="../pretrain/ResNet34_pretrained" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_resnet34_025_120_models" \
--save_inference True >${log_path}/slim_prune_fpgm_resnet34_42_T 2>&1
print_info $? slim_prune_fpgm_resnet34_42_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_025_120_models/0" >${log_path}/slim_prune_fpgm_resnet34_42_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_42_eval
# 3.3 prune ResNet50
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 2.1版本时默认BS=256 会报显存不足,故暂时修改成128
python train.py --model ResNet50 --pruned_ratio 0.31 --data "imagenet" \
--save_inference True --pretrained_model ../pretrain/ResNet50_pretrained \
--num_epochs 1 --batch_size 128 >${log_path}/prune_ResNet50_T 2>&1
print_info $? prune_ResNet50_T
}
# 3.4 dygraph_prune
#dy_prune_ResNet34_f42
demo_dygraph_pruning(){
cd ${slim_dir}/demo/dygraph/pruning || catchException demo_dygraph_pruning
ln -s ${slim_dir}/demo/data data
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=1 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet34_f42_gpu1 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1
#2.3 恢复训练 通过设置checkpoint选项进行恢复训练:
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" \
--checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet34_f42_gpu1_load 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_load
#2.4. 评估 通过调用eval.py脚本,对剪裁和重训练后的模型在测试数据上进行精度:
CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
--checkpoint=./fpgm_resnet34_025_120_models/1 \
--model="resnet34" \
--pruned_ratio=0.25 \
--batch_size=128 >${log_path}/dy_prune_ResNet34_f42_gpu1_eval 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_eval
#2.5. 导出模型 执行以下命令导出用于预测的模型:
CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
--checkpoint=./fpgm_resnet34_025_120_models/final \
--model="resnet34" \
--pruned_ratio=0.25 \
--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet34_f42_gpu1_export 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_export
#add dy_prune_fpgm_mobilenetv1_50_T
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_mobilenetv1_train_log" \
train.py \
--model="mobilenet_v1" \
--data="imagenet" \
--pruned_ratio=0.3125 \
--lr=0.1 \
--num_epochs=1 \
--test_period=1 \
--step_epochs 30 60 90\
--l2_decay=3e-5 \
--lr_strategy="piecewise_decay" \
--criterion="fpgm" \
--model_path="./fpgm_mobilenetv1_models" > ${log_path}/dy_prune_fpgm_mobilenetv1_50_T 2>&1
print_info $? dy_prune_fpgm_mobilenetv1_50_T
#add dy_prune_fpgm_mobilenetv2_50_T
# CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
# --log_dir="fpgm_mobilenetv2_train_log" \
# train.py \
# --model="mobilenet_v2" \
# --data="imagenet" \
# --pruned_ratio=0.325 \
# --lr=0.001 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 80\
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="fpgm" \
# --model_path="./fpgm_mobilenetv2_models" > ${log_path}/dy_prune_fpgm_mobilenetv2_50_T 2>&1
# print_info $? dy_prune_fpgm_mobilenetv2_50_T
#add
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_resnet34_f_42_train_log" \
train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--batch_size=128 \
--num_epochs=1 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet34_f42_gpu2 2>&1
print_info $? dy_prune_ResNet34_f42_gpu2
}
# 3.5 st unstructured_prune
demo_unstructured_prune(){
cd ${slim_dir}/demo/unstructured_prune || catchException demo_unstructured_prune
# 注意,上述命令中的batch_size为多张卡上总的batch_size,即一张卡的batch_size为256。
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_unstructured_models >${log_path}/st_unstructured_prune_threshold_T 2>&1
print_info $? st_unstructured_prune_threshold_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models \
--data="imagenet" >${log_path}/st_unstructured_prune_threshold_eval 2>&1
print_info $? st_unstructured_prune_threshold_eval
## sparsity: -55%, accuracy: 67%+/87%+
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_ratio_models >${log_path}/st_unstructured_prune_ratio_T 2>&1
print_info $? st_unstructured_prune_ratio_T
# MNIST数据集
# python train.py \
# --batch_size 256 \
# --pretrained_model ../pretrain/MobileNetV1_pretrained \
# --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --data mnist \
# --lr_strategy piecewise_decay \
# --step_epochs 1 2 3 \
# --num_epochs 1 \
# --test_period 1 \
# --model_period 1 \
# --model_path st_unstructured_models_mnist >${log_path}/st_unstructured_prune_threshold_mnist_T 2>&1
# print_info $? st_unstructured_prune_threshold_mnist_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models_mnist \
--data="mnist" >${log_path}/st_unstructured_prune_threshold_mnist_eval 2>&1
print_info $? st_unstructured_prune_threshold_mnist_eval
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="st_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--model MobileNet \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 5 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--prune_params_type conv1x1_only \
--pruning_strategy gmp > ${log_path}/st_unstructured_prune_ratio_gmp 2>&1
print_info $? st_unstructured_prune_ratio_gmp
}
demo_dygraph_unstructured_pruning(){
# dy_threshold
cd ${slim_dir}/demo/dygraph/unstructured_pruning || catchException demo_dygraph_unstructured_pruning
export CUDA_VISIBLE_DEVICES=${cudaid2}
## sparsity: -55%, accuracy: 67%+/87%+
python -m paddle.distributed.launch \
--log_dir train_dy_ratio_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_ratio_models >${log_path}/dy_prune_ratio_T 2>&1
print_info $? dy_prune_ratio_T
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models >${log_path}/dy_threshold_prune_T 2>&1
print_info $? dy_threshold_prune_T
# eval
python evaluate.py --pruned_model dy_threshold_models/model.pdparams \
--data imagenet >${log_path}/dy_threshold_prune_eval 2>&1
print_info $? dy_threshold_prune_eval
# load
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_load_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 3 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models_new \
--pretrained_model dy_threshold_models/model.pdparams \
--last_epoch 1 > ${log_path}/dy_threshold_prune_T_load 2>&1
print_info $? dy_threshold_prune_T_load
# cifar10
# python train.py --data cifar10 --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --model_period 1 \
# --num_epochs 2 >${log_path}/dy_threshold_prune_cifar10_T 2>&1
# print_info $? dy_threshold_prune_cifar10_T
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="dy_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 100 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--pruning_strategy gmp \
--skip_params_type exclude_conv1x1 ${log_path}/dy_unstructured_prune_ratio_gmp 2>&1
print_info $? dy_unstructured_prune_ratio_gmp
}
##################
all_prune(){ # 7个模型
demo_prune
demo_dygraph_pruning
demo_unstructured_prune # 4个模型
demo_dygraph_unstructured_pruning
}
#4 nas
demo_nas(){
# 4.1 sa_nas_mobilenetv2
cd ${slim_dir}/demo/nas || catchException demo_nas
model=demo_nas_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
}
demo_nas4(){
cd ${slim_dir}/demo/nas || catchException demo_nas4
model=sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --retain_epoch 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.2 block_sa_nas_mobilenetv2
model=block_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python block_sa_nas_mobilenetv2.py --search_steps 1 --port 8883 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.3 rl_nas
model=rl_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python rl_nas_mobilenetv2.py --search_steps 1 --port 8885 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.4 parl_nas
#model=parl_nas_v2_T_1card
#CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py \
#--search_steps 1 --port 8887 >${log_path}/${model} 2>&1
#print_info $? ${model}
}
all_nas(){ # 3 个模型
demo_nas
}
# 5 darts
# search 1card # DARTS一阶近似搜索方法
demo_darts(){
cd ${slim_dir}/demo/darts || catchException demo_darts
model=darts1_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 \
--use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
#train
model=pcdarts_train_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
# 可视化
#pip install graphviz
#model=slim_darts_visualize_pcdarts
#python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
#print_info $? ${model}
}
slimfacenet(){
cd ${slim_dir}/demo/slimfacenet || catchException slimfacenet
ln -s ${data_path}/slim/slimfacenet/CASIA CASIA
ln -s ${data_path}/slim/slimfacenet/lfw lfw
model=slim_slimfacenet_B75_train
CUDA_VISIBLE_DEVICES=${cudaid1} python -u train_eval.py \
--train_data_dir=./CASIA/ --test_data_dir=./lfw/ \
--action train --model=SlimFaceNet_B_x0_75 \
--start_epoch 0 --total_epoch 1 >${log_path}/slim_slimfacenet_B75_train 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_quan
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action quant --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_quan 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_eval
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action test --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_eval 2>&1
print_info $? ${model}
}
all_darts(){ # 2个模型
demo_darts
#slimfacenet 需要删掉
}
demo_latency(){
cd ${slim_dir}/demo/analysis || catchException demo_latency
model=latency_mobilenet_v1_fp32
python latency_predictor.py --model mobilenet_v1 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v1_int8
python latency_predictor.py --model mobilenet_v1 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_fp32
python latency_predictor.py --model mobilenet_v2 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_int8
python latency_predictor.py --model mobilenet_v2 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
}
all_latency(){
demo_latency
}
####################################
export all_case_list=(all_distillation all_quant all_prune all_nas )
export all_case_time=0
declare -A all_P0case_dic
all_case_dic=(["all_distillation"]=5 ["all_quant"]=15 ["all_prune"]=1 ["all_nas"]=30 ["all_darts"]=30 ['unstructured_prune']=15 ['dy_qat1']=1)
for key in $(echo ${!all_case_dic[*]});do
all_case_time=`expr ${all_case_time} + ${all_case_dic[$key]}`
done
set -e
echo -e "\033[35m ---- P0case_list length: ${#all_case_list[*]}, cases: ${all_case_list[*]} \033[0m"
echo -e "\033[35m ---- P0case_time: $all_case_time min \033[0m"
set +e
####################################
echo -e "\033[35m ---- start run case \033[0m"
case_num=1
for model in ${all_case_list[*]};do
echo -e "\033[35m ---- running P0case $case_num/${#all_case_list[*]}: ${model} , task time: ${all_case_list[${model}]} min \033[0m"
${model}
let case_num++
done
echo -e "\033[35m ---- end run case \033[0m"
cd ${slim_dir}/logs
FF=`ls *FAIL*|wc -l`
if [ "${FF}" -gt "0" ];then
exit 1
else
exit 0
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册