未验证 提交 dc44c944 编写于 作者: I iamWHTWD 提交者: GitHub

Update sa_nas_mobilenetv2.py

上级 76356ff6
#!/usr/bin/env bash import sys
################## sys.path.append('..')
#bash slim_ci_demo_all_case.sh $5 $6; import numpy as np
import argparse
print_info(){ import ast
if [ $1 -ne 0 ];then import time
mv ${log_path}/$2 ${log_path}/FAIL_$2.log import argparse
echo -e "\033[31m ${log_path}/FAIL_$2 \033[0m" import ast
echo "fail log as follow" import logging
cat ${log_path}/FAIL_$2.log import paddle
else import paddle.nn as nn
mv ${log_path}/$2 ${log_path}/SUCCESS_$2.log import paddle.static as static
echo -e "\033[32m ${log_path}/SUCCESS_$2 \033[0m" import paddle.nn.functional as F
cat ${log_path}/SUCCESS_$2.log import paddle.vision.transforms as T
fi from paddle import ParamAttr
} from paddleslim.analysis import flops
from paddleslim.nas import SANAS
catchException() { from paddleslim.common import get_logger
echo $1 failed due to exception >> FAIL_Exception.log from optimizer import create_optimizer
} import imagenet_reader
cudaid1=$1; _logger = get_logger(__name__, level=logging.INFO)
cudaid2=$2;
echo "cudaid1,cudaid2", ${cudaid1}, ${cudaid2}
export CUDA_VISIBLE_DEVICES=${cudaid1} def build_program(main_program,
#分布式log输出方式 startup_program,
export PADDLE_LOG_LEVEL=debug image_shape,
dataset,
export FLAGS_fraction_of_gpu_memory_to_use=0.98 archs,
# data PaddleSlim/demo/data/ILSVRC2012 args,
cd ${slim_dir}/demo places,
if [ -d "data" ];then is_test=False):
rm -rf data with static.program_guard(main_program, startup_program):
fi with paddle.utils.unique_name.guard():
wget -q https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz --no-check-certificate data_shape = [None] + image_shape
tar xf ILSVRC2012_data_demo.tar.gz data = static.data(name='data', shape=data_shape, dtype='float32')
mv ILSVRC2012_data_demo data label = static.data(name='label', shape=[None, 1], dtype='int64')
# download pretrain model if args.data == 'cifar10':
root_url="http://paddle-imagenet-models-name.bj.bcebos.com" paddle.assign(paddle.reshape(label, [-1, 1]), label)
pre_models="MobileNetV1 MobileNetV2 MobileNetV3_large_x1_0_ssld ResNet101_vd MobileNetV2 ResNet34 ResNet50 ResNet50_vd" if is_test:
if [ -d "pretrain" ];then data_loader = paddle.io.DataLoader(
rm -rf pretrain dataset,
fi places=places,
mkdir pretrain && cd pretrain feed_list=[data, label],
for model in ${pre_models} drop_last=False,
do batch_size=args.batch_size,
if [ ! -f ${model} ]; then return_list=False,
wget -q ${root_url}/${model}_pretrained.tar shuffle=False)
tar xf ${model}_pretrained.tar else:
fi data_loader = paddle.io.DataLoader(
done dataset,
places=places,
# 1 dist feed_list=[data, label],
demo_distillation_01(){ drop_last=True,
cd ${slim_dir}/demo/distillation || catchException demo_distillation batch_size=args.batch_size,
if [ -d "output" ];then return_list=False,
rm -rf output shuffle=True,
fi use_shared_memory=True,
export CUDA_VISIBLE_DEVICES=${cudaid1} num_workers=4)
python distill.py --num_epochs 1 --save_inference True >${log_path}/demo_distillation_ResNet50_vd_T 2>&1 output = archs(data)
print_info $? demo_distillation_ResNet50_vd_T output = static.nn.fc(x=output, size=args.class_dim)
} softmax_out = F.softmax(output)
cost = F.cross_entropy(softmax_out, label=label)
demo_distillation_02(){ avg_cost = paddle.mean(cost)
cd ${slim_dir}/demo/distillation || catchException demo_distillation acc_top1 = paddle.metric.accuracy(
if [ -d "output" ];then input=softmax_out, label=label, k=1)
rm -rf output acc_top5 = paddle.metric.accuracy(
fi input=softmax_out, label=label, k=5)
export CUDA_VISIBLE_DEVICES=${cudaid1} if is_test == False:
python distill.py --num_epochs 1 --batch_size 64 --save_inference True \ optimizer = create_optimizer(args)
--model ResNet50 --teacher_model ResNet101_vd \ optimizer.minimize(avg_cost)
--teacher_pretrained_model ../pretrain/ResNet101_vd_pretrained >${log_path}/demo_distillation_ResNet101_vd_ResNet50_T 2>&1 return data_loader, avg_cost, acc_top1, acc_top5
print_info $? demo_distillation_ResNet101_vd_ResNet50_T
python distill.py --num_epochs 1 --batch_size 64 --save_inference True \ def search_mobilenetv2(config, args, image_size, is_server=True):
--model MobileNetV2_x0_25 --teacher_model MobileNetV2 \ image_shape = [3, image_size, image_size]
--teacher_pretrained_model ../pretrain/MobileNetV2_pretrained >${log_path}/demo_distillation_MobileNetV2_MobileNetV2_x0_25_T 2>&1 if args.data == 'cifar10':
print_info $? demo_distillation_MobileNetV2_MobileNetV2_x0_25_T transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
} train_dataset = paddle.vision.datasets.Cifar10(
mode='train', transform=transform, backend='cv2')
demo_deep_mutual_learning(){ val_dataset = paddle.vision.datasets.Cifar10(
cd ${slim_dir}/demo/deep_mutual_learning || catchException demo_deep_mutual_learning mode='test', transform=transform, backend='cv2')
export CUDA_VISIBLE_DEVICES=${cudaid1}
model=dml_mv1_mv1_gpu1 elif args.data == 'imagenet':
CUDA_VISIBLE_DEVICES=${cudaid1} train_dataset = imagenet_reader.ImageNetDataset(mode='train')
python dml_train.py --epochs 1 >${log_path}/${model} 2>&1 val_dataset = imagenet_reader.ImageNetDataset(mode='val')
print_info $? ${model}
model=dml_mv1_res50_gpu1 places = static.cuda_places() if args.use_gpu else static.cpu_places()
CUDA_VISIBLE_DEVICES=${cudaid1} place = places[0]
python dml_train.py --models='mobilenet-resnet50' --batch_size 128 --epochs 1 >${log_path}/${model} 2>&1 if is_server:
print_info $? ${model} ### start a server and a client
} sa_nas = SANAS(
config,
all_distillation(){ # 大数据 5个模型 server_addr=(args.server_address, args.port),
demo_distillation_01 # 3 search_steps=args.search_steps,
#demo_distillation_02 is_server=True)
#demo_deep_mutual_learning # 2 else:
} ### start a client
# 2.1 quant/quant_aware 使用小数据集即可 sa_nas = SANAS(
demo_quant_quant_aware(){ config,
cd ${slim_dir}/demo/quant/quant_aware || catchException demo_quant_quant_aware server_addr=(args.server_address, args.port),
if [ -d "output" ];then search_steps=args.search_steps,
rm -rf output is_server=False)
fi
export CUDA_VISIBLE_DEVICES=${cudaid1} for step in range(args.search_steps):
# 2.1版本时默认BS=256会报显存不足,故暂时修改成128 archs = sa_nas.next_archs()[0]
python train.py --model MobileNet --pretrained_model ../../pretrain/MobileNetV1_pretrained \
--checkpoint_dir ./output/mobilenetv1 --num_epochs 1 --batch_size 128 >${log_path}/demo_quant_quant_aware_v1 2>&1 train_program = static.Program()
print_info $? demo_quant_quant_aware_v1 test_program = static.Program()
startup_program = static.Program()
export CUDA_VISIBLE_DEVICES=${cudaid1} train_loader, avg_cost, acc_top1, acc_top5 = build_program(
python train.py --model ResNet34 \ train_program, startup_program, image_shape, train_dataset, archs,
--pretrained_model ../../pretrain/ResNet34_pretrained \ args, places)
--checkpoint_dir ./output/ResNet34 --num_epochs 1 >${log_path}/demo_quant_quant_aware_ResNet34_T 2>&1
print_info $? demo_quant_quant_aware_ResNet34_T current_flops = flops(train_program)
} print('step: {}, current_flops: {}'.format(step, current_flops))
# 2.2 quant/quant_embedding if current_flops > int(321208544):
demo_quant_quant_embedding(){ continue
cd ${slim_dir}/demo/quant/quant_embedding || catchException demo_quant_quant_embedding
export CUDA_VISIBLE_DEVICES=${cudaid1} test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
# 先使用word2vec的demo数据进行一轮训练,比较量化前infer结果同量化后infer结果different test_program,
if [ -d "data" ];then startup_program,
rm -rf data image_shape,
fi val_dataset,
wget -q https://sys-p0.bj.bcebos.com/slim_ci/word_2evc_demo_data.tar.gz --no-check-certificate archs,
tar xf word_2evc_demo_data.tar.gz args,
mv word_2evc_demo_data data place,
if [ -d "v1_cpu5_b100_lr1dir" ];then is_test=True)
rm -rf v1_cpu5_b100_lr1dir test_program = test_program.clone(for_test=True)
fi
OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 \ exe = static.Executor(place)
--dict_path data/test_build_dict --num_passes 1 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir \ exe.run(startup_program)
--base_lr 1.0 --print_batch 1000 --with_speed --is_sparse >${log_path}/quant_em_word2vec_T 2>&1
print_info $? quant_em_word2vec_T build_strategy = static.BuildStrategy()
# 量化前infer train_compiled_program = static.CompiledProgram(
python infer.py --infer_epoch --test_dir data/test_mid_dir \ train_program).with_data_parallel(
--dict_path data/test_build_dict_word_to_id_ \ loss_name=avg_cost.name, build_strategy=build_strategy)
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ \ for epoch_id in range(args.retain_epoch):
--start_index 0 --last_index 0 >${log_path}/quant_em_infer1 2>&1 for batch_id, data in enumerate(train_loader()):
print_info $? quant_em_infer1 fetches = [avg_cost.name]
# 量化后infer s_time = time.time()
python infer.py --infer_epoch --test_dir data/test_mid_dir \ outs = exe.run(train_compiled_program,
--dict_path data/test_build_dict_word_to_id_ \ feed=data,
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 \ fetch_list=fetches)[0]
--last_index 0 --emb_quant True >${log_path}/quant_em_infer2 2>&1 batch_time = time.time() - s_time
print_info $? quant_em_infer2 if batch_id % 10 == 0:
} _logger.info(
# 2.3 quan_post # 小数据集 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
demo_quant_quant_post(){ format(step, epoch_id, batch_id, outs[0], batch_time))
# 20210425 新增4种离线量化方法
cd ${slim_dir}/demo/quant/quant_post || catchException demo_quant_quant_post reward = []
export CUDA_VISIBLE_DEVICES=${cudaid1} for batch_id, data in enumerate(test_loader()):
# 1 导出模型 test_fetches = [
python export_model.py --model "MobileNet" --pretrained_model ../../pretrain/MobileNetV1_pretrained \ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
--data imagenet >${log_path}/st_quant_post_v1_export 2>&1 ]
print_info $? st_quant_post_v1_export batch_reward = exe.run(test_program,
# 量化前eval feed=data,
python eval.py --model_path ./inference_model/MobileNet --model_name model \ fetch_list=test_fetches)
--params_name weights >${log_path}/st_quant_post_v1_eval1 2>&1 reward_avg = np.mean(np.array(batch_reward), axis=1)
print_info $? st_quant_post_v1_eval1 reward.append(reward_avg)
# 3 离线量化 _logger.info(
# 4 量化后eval 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
for algo in hist avg mse format(step, batch_id, batch_reward[0], batch_reward[1],
do batch_reward[2]))
## 不带bc 离线量化
echo "quant_post train no bc " ${algo} finally_reward = np.mean(np.array(reward), axis=0)
python quant_post.py --model_path ./inference_model/MobileNet \ _logger.info(
--save_path ./quant_model/${algo}/MobileNet \ 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
--model_filename model --params_filename weights --algo ${algo} >${log_path}/st_quant_post_v1_T_${algo} 2>&1 finally_reward[0], finally_reward[1], finally_reward[2]))
print_info $? st_quant_post_v1_T_${algo}
# 量化后eval sa_nas.reward(float(finally_reward[1]))
echo "quant_post eval no bc " ${algo}
python eval.py --model_path ./quant_model/${algo}/MobileNet --model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_${algo}_eval2 2>&1 def test_search_result(tokens, image_size, args, config):
print_info $? st_quant_post_${algo}_eval2 places = static.cuda_places() if args.use_gpu else static.cpu_places()
place = places[0]
# 带bc参数的 离线量化
echo "quant_post train bc " ${algo} sa_nas = SANAS(
python quant_post.py --model_path ./inference_model/MobileNet \ config,
--save_path ./quant_model/${algo}_bc/MobileNet \ server_addr=(args.server_address, args.port),
--model_filename model --params_filename weights \ search_steps=args.search_steps,
--algo ${algo} --bias_correction True >${log_path}/st_quant_post_T_${algo}_bc 2>&1 is_server=True)
print_info $? st_quant_post_T_${algo}_bc
image_shape = [3, image_size, image_size]
# 量化后eval if args.data == 'cifar10':
echo "quant_post eval bc " ${algo} transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
python eval.py --model_path ./quant_model/${algo}_bc/MobileNet --model_name __model__ \ train_dataset = paddle.vision.datasets.Cifar10(
--params_name __params__ > ${log_path}/st_quant_post_${algo}_bc_eval2 2>&1 mode='train', transform=transform, backend='cv2')
print_info $? st_quant_post_${algo}_bc_eval2 val_dataset = paddle.vision.datasets.Cifar10(
mode='test', transform=transform, backend='cv2')
done
} elif args.data == 'imagenet':
train_dataset = imagenet_reader.ImageNetDataset(mode='train')
# 2.3 quant_post_hpo # 小数据集 val_dataset = imagenet_reader.ImageNetDataset(mode='val')
demo_quant_quant_post_hpo(){
archs = sa_nas.tokens2arch(tokens)[0]
cd ${slim_dir}/demo/quant/quant_post_hpo || catchException demo_quant_quant_post_hpo
export CUDA_VISIBLE_DEVICES=${cudaid1} train_program = static.Program()
# 1.导出模型 test_program = static.Program()
python ../quant_post/export_model.py \ startup_program = static.Program()
--model "MobileNet" \ train_loader, avg_cost, acc_top1, acc_top5 = build_program(
--pretrained_model ../../pretrain/MobileNetV1_pretrained \ train_program, startup_program, image_shape, train_dataset, archs, args,
--data imagenet > ${log_path}/st_quant_post__hpo_v1_export 2>&1 places)
print_info $? st_quant_post__hpo_v1_export
# 2. quant_post_hpo 设置max_model_quant_count=2 current_flops = flops(train_program)
python quant_post_hpo.py \ print('current_flops: {}'.format(current_flops))
--use_gpu=True \ test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
--model_path="./inference_model/MobileNet/" \ test_program,
--save_path="./inference_model/MobileNet_quant/" \ startup_program,
--model_filename="model" \ image_shape,
--params_filename="weights" \ val_dataset,
--max_model_quant_count=2 > ${log_path}/st_quant_post_hpo 2>&1 archs,
print_info $? st_quant_post_hpo args,
# 3. 量化后eval place,
python ../quant_post/eval.py \ is_test=True)
--model_path ./inference_model/MobileNet_quant \
--model_name __model__ \ test_program = test_program.clone(for_test=True)
--params_name __params__ > ${log_path}/st_quant_post_hpo_eval 2>&1
print_info $? st_quant_post_hpo_eval exe = static.Executor(place)
exe.run(startup_program)
}
build_strategy = static.BuildStrategy()
#2.4 train_compiled_program = static.CompiledProgram(
demo_quant_pact_quant_aware(){ train_program).with_data_parallel(
cd ${slim_dir}/demo/quant/pact_quant_aware || catchException demo_quant_pact_quant_aware loss_name=avg_cost.name, build_strategy=build_strategy)
export CUDA_VISIBLE_DEVICES=${cudaid1} for epoch_id in range(args.retain_epoch):
# 普通量化,使用小数据集即可 for batch_id, data in enumerate(train_loader()):
# 2.1版本时默认BS=128 会报显存不足,故暂时修改成64 fetches = [avg_cost.name]
python train.py --model MobileNetV3_large_x1_0 \ s_time = time.time()
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ outs = exe.run(train_compiled_program,
--num_epochs 1 --lr 0.0001 --use_pact False --batch_size 128 >${log_path}/demo_quant_pact_quant_aware_v3_nopact 2>&1 feed=data,
print_info $? demo_quant_pact_quant_aware_v3_nopact fetch_list=fetches)[0]
python train.py --model MobileNetV3_large_x1_0 \ batch_time = time.time() - s_time
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ if batch_id % 10 == 0:
--num_epochs 1 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \ _logger.info(
--step_epochs 2 --l2_decay 1e-5 >${log_path}/demo_quant_pact_quant_aware_v3 2>&1 'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
print_info $? demo_quant_pact_quant_aware_v3 format(epoch_id, batch_id, outs[0], batch_time))
# load
python train.py --model MobileNetV3_large_x1_0 \ reward = []
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ for batch_id, data in enumerate(test_loader()):
--num_epochs 2 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \ test_fetches = [
--step_epochs 20 --l2_decay 1e-5 \ test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
--checkpoint_dir ./output/MobileNetV3_large_x1_0/0 \ ]
--checkpoint_epoch 0 >${log_path}/demo_quant_pact_quant_aware_v3_load 2>&1 batch_reward = exe.run(test_program,
print_info $? demo_quant_pact_quant_aware_v3_load feed=data,
} fetch_list=test_fetches)
reward_avg = np.mean(np.array(batch_reward), axis=1)
# 2.5 reward.append(reward_avg)
demo_dygraph_quant(){
cd ${slim_dir}/demo/dygraph/quant || catchException demo_dygraph_quant _logger.info(
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \ 'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
--pretrained_model '../../pretrain/MobileNetV1_pretrained' \ format(batch_id, batch_reward[0], batch_reward[1], batch_reward[
--num_epochs 1 \ 2]))
--batch_size 128 \
> ${log_path}/dy_quant_v1_gpu1 2>&1 finally_reward = np.mean(np.array(reward), axis=0)
print_info $? dy_quant_v1_gpu1 _logger.info(
# dy_pact_v3 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --lr=0.001 \ finally_reward[0], finally_reward[1], finally_reward[2]))
--batch_size 128 \
--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ if __name__ == '__main__':
--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
print_info $? dy_pact_quant_v3_gpu1 parser = argparse.ArgumentParser(
# 多卡训练,以0到3号卡为例 description='SA NAS MobileNetV2 cifar10 argparase')
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \ parser.add_argument(
train.py --lr=0.001 \ '--use_gpu',
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ type=ast.literal_eval,
--use_pact=True --num_epochs=1 \ default=True,
--l2_decay=2e-5 \ help='Whether to use GPU in train/test model.')
--ls_epsilon=0.1 \ parser.add_argument(
--batch_size=128 \ '--batch_size', type=int, default=256, help='batch size.')
--model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1 parser.add_argument(
print_info $? dy_pact_quant_v3_gpu4 '--class_dim', type=int, default=10, help='classify number.')
} parser.add_argument(
# 2.6 '--data',
ce_tests_dygraph_qat(){ type=str,
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat default='cifar10',
ln -s ${slim_dir}/demo/data/ILSVRC2012 choices=['cifar10', 'imagenet'],
test_samples=1000 # if set as -1, use all test samples help='server address.')
data_path='./ILSVRC2012/' parser.add_argument(
batch_size=16 '--is_server',
epoch=1 type=ast.literal_eval,
lr=0.0001 default=True,
num_workers=1 help='Whether to start a server.')
output_dir=$PWD/output_models parser.add_argument(
for model in mobilenet_v1 '--search_steps',
do type=int,
# if [ $1 == nopact ];then default=100,
# 1 quant train help='controller server number.')
echo "------1 nopact train--------", ${model} parser.add_argument(
export CUDA_VISIBLE_DEVICES=${cudaid1} '--server_address', type=str, default="", help='server ip.')
python ./src/qat.py \ parser.add_argument('--port', type=int, default=8881, help='server port')
--arch=${model} \ parser.add_argument(
--data=${data_path} \ '--retain_epoch', type=int, default=5, help='epoch for each token.')
--epoch=${epoch} \ parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
--batch_size=32 \ args = parser.parse_args()
--num_workers=${num_workers} \ print(args)
--lr=${lr} \
--output_dir=${output_dir} \ if args.data == 'cifar10':
--enable_quant > qat_${model}_gpu1_nw1 2>&1 image_size = 32
# 2 eval before save quant block_num = 3
echo "--------2 eval before save quant -------------", ${model} elif args.data == 'imagenet':
python ./src/eval.py \ image_size = 224
--model_path=./output_models/quant_dygraph/${model} \ block_num = 6
--data_dir=${data_path} \ else:
--test_samples=${test_samples} \ raise NotImplementedError(
--batch_size=${batch_size} > eval_before_save_${model} 2>&1 'data must in [cifar10, imagenet], but received: {}'.format(
# 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。 args.data))
echo "--------3 save_nopact_quant_model-------------", ${model}
python src/save_quant_model.py \ config = [('MobileNetV2Space')]
--load_model_path output_models/quant_dygraph/${model} \ paddle.enable_static()
--save_model_path int8_models/${model} > save_quant_${model} 2>&1 search_mobilenetv2(config, args, image_size, is_server=args.is_server)
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_qat(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=16
epoch=1
lr=0.0001
num_workers=1
output_dir=$PWD/output_models
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
# if [ $1 == nopact ];then
# 1 quant train
echo "------1 nopact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=${output_dir} \
--enable_quant > qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_save_${model} 2>&1
# 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
echo "--------3 save_nopact_quant_model-------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models/quant_dygraph/${model} \
--save_model_path int8_models/${model} > save_quant_${model} 2>&1
# 4
echo "--------4 CPU eval after save nopact quant -------------", ${model}
export CUDA_VISIBLE_DEVICES=
python ./src/eval.py \
--model_path=./int8_models/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
# elif [ $1 == pact ];then
# 1 pact quant train
echo "------1 pact train--------", ${model}
export CUDA_VISIBLE_DEVICES=${cudaid1}
python ./src/qat.py \
--arch=${model} \
--data=${data_path} \
--epoch=${epoch} \
--batch_size=32 \
--num_workers=${num_workers} \
--lr=${lr} \
--output_dir=$PWD/output_models_pact/ \
--enable_quant \
--use_pact > pact_qat_${model}_gpu1_nw1 2>&1
# 2 eval before save quant
echo "--------2 eval before save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./output_models_pact/quant_dygraph/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
echo "--------3 save pact quant -------------", ${model}
python src/save_quant_model.py \
--load_model_path output_models_pact/quant_dygraph/${model} \
--save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
echo "--------4 CPU eval after save pact quant -------------", ${model}
python ./src/eval.py \
--model_path=./int8_models_pact/${model} \
--data_dir=${data_path} \
--test_samples=${test_samples} \
--batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
# fi
done
}
ce_tests_dygraph_ptq(){
cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_ptq4
ln -s ${slim_dir}/demo/data/ILSVRC2012
test_samples=1000 # if set as -1, use all test samples
data_path='./ILSVRC2012/'
batch_size=32
epoch=1
output_dir="./output_ptq"
quant_batch_num=10
quant_batch_size=10
for model in mobilenet_v1
#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
do
echo "--------quantize model: ${model}-------------"
export CUDA_VISIBLE_DEVICES=${cudaid1}
# save ptq quant model
python ./src/ptq.py \
--data=${data_path} \
--arch=${model} \
--quant_batch_num=${quant_batch_num} \
--quant_batch_size=${quant_batch_size} \
--output_dir=${output_dir} > ${log_path}/ptq_${model} 2>&1
print_info $? ptq_${model}
echo "-------- eval fp32_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/fp32_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=True \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_fp32_${model} 2>&1
print_info $? ptq_eval_fp32_${model}
echo "-------- eval int8_infer model -------------", ${model}
python ./src/test.py \
--model_path=${output_dir}/${model}/int8_infer \
--data_dir=${data_path} \
--batch_size=${batch_size} \
--use_gpu=False \
--test_samples=${test_samples} \
--ir_optim=False > ${log_path}/ptq_eval_int8_${model} 2>&1
print_info $? ptq_eval_int8_${model}
done
}
#用于更新release分支下无ce_tests_dygraph_ptq case;release分支设置is_develop="False"
is_develop="True"
all_quant(){ # 10个模型
if [ "${is_develop}" == "True" ];then
#ce_tests_dygraph_ptq4
ce_tests_dygraph_ptq
fi
demo_quant_quant_aware # 2个模型
demo_quant_quant_embedding # 1个模型
demo_quant_quant_post # 4个策略
demo_dygraph_quant # 2个模型
demo_quant_pact_quant_aware # 1个模型
ce_tests_dygraph_qat # 4个模型
#ce_tests_dygraph_qat4
demo_quant_quant_post_hpo
}
# 3 prune
demo_prune(){
cd ${slim_dir}/demo/prune || catchException demo_prune
# 3.1 P0 prune
if [ -d "models" ];then
rm -rf models
fi
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py --model "MobileNet" --pruned_ratio 0.31 --data "imagenet" \
--pretrained_model ../pretrain/MobileNetV1_pretrained/ --num_epochs 1 >${log_path}/prune_v1_T 2>&1
print_info $? prune_v1_T
#3.2 prune_fpgm
# slim_prune_fpgm_v1_T
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="MobileNet" \
# --pretrained_model="../pretrain/MobileNetV1_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.1 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 90\
# --l2_decay=3e-5 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./fpgm_mobilenetv1_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_v1_T 2>&1
# print_info $? slim_prune_fpgm_v1_T
#slim_prune_fpgm_v2_T
export CUDA_VISIBLE_DEVICES=${cudaid1}
#v2 -50%
python train.py \
--model="MobileNetV2" \
--pretrained_model="../pretrain/MobileNetV2_pretrained" \
--data="imagenet" \
--pruned_ratio=0.325 \
--lr=0.001 \
--num_epochs=2 \
--test_period=1 \
--step_epochs 30 60 80 \
--l2_decay=1e-4 \
--lr_strategy="piecewise_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_mobilenetv2_models" \
--save_inference True >${log_path}/slim_prune_fpgm_v2_T 2>&1
print_info $? slim_prune_fpgm_v2_T
python eval.py --model "MobileNetV2" --data "imagenet" \
--model_path "./output/fpgm_mobilenetv2_models/0" >${log_path}/slim_prune_fpgm_v2_eval 2>&1
print_info $? slim_prune_fpgm_v2_eval
# ResNet34 -50
# export CUDA_VISIBLE_DEVICES=${cudaid1}
# python train.py \
# --model="ResNet34" \
# --pretrained_model="../pretrain/ResNet34_pretrained" \
# --data="imagenet" \
# --pruned_ratio=0.3125 \
# --lr=0.001 \
# --num_epochs=2 \
# --test_period=1 \
# --step_epochs 30 60 \
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="geometry_median" \
# --model_path="./output/fpgm_resnet34_50_models" \
# --save_inference True >${log_path}/slim_prune_fpgm_resnet34_50_T 2>&1
print_info $? slim_prune_fpgm_resnet34_50_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_50_models/0" >${log_path}/slim_prune_fpgm_resnet34_50_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_50_eval
# ResNet34 -42 slim_prune_fpgm_resnet34_42_T
cd ${slim_dir}/demo/prune
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--model="ResNet34" \
--pretrained_model="../pretrain/ResNet34_pretrained" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="geometry_median" \
--model_path="./output/fpgm_resnet34_025_120_models" \
--save_inference True >${log_path}/slim_prune_fpgm_resnet34_42_T 2>&1
print_info $? slim_prune_fpgm_resnet34_42_T
python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_025_120_models/0" >${log_path}/slim_prune_fpgm_resnet34_42_eval 2>&1
print_info $? slim_prune_fpgm_resnet34_42_eval
# 3.3 prune ResNet50
export CUDA_VISIBLE_DEVICES=${cudaid1}
# 2.1版本时默认BS=256 会报显存不足,故暂时修改成128
python train.py --model ResNet50 --pruned_ratio 0.31 --data "imagenet" \
--save_inference True --pretrained_model ../pretrain/ResNet50_pretrained \
--num_epochs 1 --batch_size 128 >${log_path}/prune_ResNet50_T 2>&1
print_info $? prune_ResNet50_T
}
# 3.4 dygraph_prune
#dy_prune_ResNet34_f42
demo_dygraph_pruning(){
cd ${slim_dir}/demo/dygraph/pruning || catchException demo_dygraph_pruning
ln -s ${slim_dir}/demo/data data
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=1 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet34_f42_gpu1 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1
#2.3 恢复训练 通过设置checkpoint选项进行恢复训练:
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--num_epochs=2 \
--batch_size=128 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" \
--checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet34_f42_gpu1_load 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_load
#2.4. 评估 通过调用eval.py脚本,对剪裁和重训练后的模型在测试数据上进行精度:
CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
--checkpoint=./fpgm_resnet34_025_120_models/1 \
--model="resnet34" \
--pruned_ratio=0.25 \
--batch_size=128 >${log_path}/dy_prune_ResNet34_f42_gpu1_eval 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_eval
#2.5. 导出模型 执行以下命令导出用于预测的模型:
CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
--checkpoint=./fpgm_resnet34_025_120_models/final \
--model="resnet34" \
--pruned_ratio=0.25 \
--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet34_f42_gpu1_export 2>&1
print_info $? dy_prune_ResNet34_f42_gpu1_export
#add dy_prune_fpgm_mobilenetv1_50_T
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_mobilenetv1_train_log" \
train.py \
--model="mobilenet_v1" \
--data="imagenet" \
--pruned_ratio=0.3125 \
--lr=0.1 \
--num_epochs=1 \
--test_period=1 \
--step_epochs 30 60 90\
--l2_decay=3e-5 \
--lr_strategy="piecewise_decay" \
--criterion="fpgm" \
--model_path="./fpgm_mobilenetv1_models" > ${log_path}/dy_prune_fpgm_mobilenetv1_50_T 2>&1
print_info $? dy_prune_fpgm_mobilenetv1_50_T
#add dy_prune_fpgm_mobilenetv2_50_T
# CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
# --log_dir="fpgm_mobilenetv2_train_log" \
# train.py \
# --model="mobilenet_v2" \
# --data="imagenet" \
# --pruned_ratio=0.325 \
# --lr=0.001 \
# --num_epochs=1 \
# --test_period=1 \
# --step_epochs 30 60 80\
# --l2_decay=1e-4 \
# --lr_strategy="piecewise_decay" \
# --criterion="fpgm" \
# --model_path="./fpgm_mobilenetv2_models" > ${log_path}/dy_prune_fpgm_mobilenetv2_50_T 2>&1
# print_info $? dy_prune_fpgm_mobilenetv2_50_T
#add
CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_resnet34_f_42_train_log" \
train.py \
--use_gpu=True \
--model="resnet34" \
--data="imagenet" \
--pruned_ratio=0.25 \
--batch_size=128 \
--num_epochs=1 \
--test_period=1 \
--lr_strategy="cosine_decay" \
--criterion="fpgm" \
--model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet34_f42_gpu2 2>&1
print_info $? dy_prune_ResNet34_f42_gpu2
}
# 3.5 st unstructured_prune
demo_unstructured_prune(){
cd ${slim_dir}/demo/unstructured_prune || catchException demo_unstructured_prune
# 注意,上述命令中的batch_size为多张卡上总的batch_size,即一张卡的batch_size为256。
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_unstructured_models >${log_path}/st_unstructured_prune_threshold_T 2>&1
print_info $? st_unstructured_prune_threshold_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models \
--data="imagenet" >${log_path}/st_unstructured_prune_threshold_eval 2>&1
print_info $? st_unstructured_prune_threshold_eval
## sparsity: -55%, accuracy: 67%+/87%+
export CUDA_VISIBLE_DEVICES=${cudaid1}
python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_ratio_models >${log_path}/st_unstructured_prune_ratio_T 2>&1
print_info $? st_unstructured_prune_ratio_T
# MNIST数据集
# python train.py \
# --batch_size 256 \
# --pretrained_model ../pretrain/MobileNetV1_pretrained \
# --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --data mnist \
# --lr_strategy piecewise_decay \
# --step_epochs 1 2 3 \
# --num_epochs 1 \
# --test_period 1 \
# --model_period 1 \
# --model_path st_unstructured_models_mnist >${log_path}/st_unstructured_prune_threshold_mnist_T 2>&1
# print_info $? st_unstructured_prune_threshold_mnist_T
# eval
python evaluate.py \
--pruned_model=st_unstructured_models_mnist \
--data="mnist" >${log_path}/st_unstructured_prune_threshold_mnist_eval 2>&1
print_info $? st_unstructured_prune_threshold_mnist_eval
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="st_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--model MobileNet \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 5 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--prune_params_type conv1x1_only \
--pruning_strategy gmp > ${log_path}/st_unstructured_prune_ratio_gmp 2>&1
print_info $? st_unstructured_prune_ratio_gmp
}
demo_dygraph_unstructured_pruning(){
# dy_threshold
cd ${slim_dir}/demo/dygraph/unstructured_pruning || catchException demo_dygraph_unstructured_pruning
export CUDA_VISIBLE_DEVICES=${cudaid2}
## sparsity: -55%, accuracy: 67%+/87%+
python -m paddle.distributed.launch \
--log_dir train_dy_ratio_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_ratio_models >${log_path}/dy_prune_ratio_T 2>&1
print_info $? dy_prune_ratio_T
## sparsity: -30%, accuracy: 70%/89%
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models >${log_path}/dy_threshold_prune_T 2>&1
print_info $? dy_threshold_prune_T
# eval
python evaluate.py --pruned_model dy_threshold_models/model.pdparams \
--data imagenet >${log_path}/dy_threshold_prune_eval 2>&1
print_info $? dy_threshold_prune_eval
# load
python -m paddle.distributed.launch \
--log_dir train_dy_threshold_load_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 3 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models_new \
--pretrained_model dy_threshold_models/model.pdparams \
--last_epoch 1 > ${log_path}/dy_threshold_prune_T_load 2>&1
print_info $? dy_threshold_prune_T_load
# cifar10
# python train.py --data cifar10 --lr 0.05 \
# --pruning_mode threshold \
# --threshold 0.01 \
# --model_period 1 \
# --num_epochs 2 >${log_path}/dy_threshold_prune_cifar10_T 2>&1
# print_info $? dy_threshold_prune_cifar10_T
export CUDA_VISIBLE_DEVICES=${cudaid2}
python -m paddle.distributed.launch \
--log_dir="dy_unstructured_prune_gmp_log" \
train.py \
--batch_size 64 \
--data imagenet \
--pruning_mode ratio \
--ratio 0.75 \
--lr 0.005 \
--num_epochs 1 \
--test_period 5 \
--model_period 10 \
--model_path "./models" \
--step_epochs 71 88 \
--initial_ratio 0.15 \
--pruning_steps 100 \
--stable_epochs 0 \
--pruning_epochs 54 \
--tunning_epochs 54 \
--last_epoch -1 \
--pruning_strategy gmp \
--skip_params_type exclude_conv1x1 ${log_path}/dy_unstructured_prune_ratio_gmp 2>&1
print_info $? dy_unstructured_prune_ratio_gmp
}
##################
all_prune(){ # 7个模型
demo_prune
demo_dygraph_pruning
demo_unstructured_prune # 4个模型
demo_dygraph_unstructured_pruning
}
#4 nas
demo_nas(){
# 4.1 sa_nas_mobilenetv2
cd ${slim_dir}/demo/nas || catchException demo_nas
model=demo_nas_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
}
demo_nas4(){
cd ${slim_dir}/demo/nas || catchException demo_nas4
model=sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --retain_epoch 1 --port 8881 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.2 block_sa_nas_mobilenetv2
model=block_sa_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python block_sa_nas_mobilenetv2.py --search_steps 1 --port 8883 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.3 rl_nas
model=rl_nas_v2_T_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python rl_nas_mobilenetv2.py --search_steps 1 --port 8885 >${log_path}/${model} 2>&1
print_info $? ${model}
# 4.4 parl_nas
#model=parl_nas_v2_T_1card
#CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py \
#--search_steps 1 --port 8887 >${log_path}/${model} 2>&1
#print_info $? ${model}
}
all_nas(){ # 3 个模型
demo_nas
}
# 5 darts
# search 1card # DARTS一阶近似搜索方法
demo_darts(){
cd ${slim_dir}/demo/darts || catchException demo_darts
model=darts1_search_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 \
--use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
#train
model=pcdarts_train_1card
CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
print_info $? ${model}
# 可视化
#pip install graphviz
#model=slim_darts_visualize_pcdarts
#python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
#print_info $? ${model}
}
slimfacenet(){
cd ${slim_dir}/demo/slimfacenet || catchException slimfacenet
ln -s ${data_path}/slim/slimfacenet/CASIA CASIA
ln -s ${data_path}/slim/slimfacenet/lfw lfw
model=slim_slimfacenet_B75_train
CUDA_VISIBLE_DEVICES=${cudaid1} python -u train_eval.py \
--train_data_dir=./CASIA/ --test_data_dir=./lfw/ \
--action train --model=SlimFaceNet_B_x0_75 \
--start_epoch 0 --total_epoch 1 >${log_path}/slim_slimfacenet_B75_train 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_quan
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action quant --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_quan 2>&1
print_info $? ${model}
model=slim_slimfacenet_B75_eval
CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action test --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_eval 2>&1
print_info $? ${model}
}
all_darts(){ # 2个模型
demo_darts
#slimfacenet 需要删掉
}
demo_latency(){
cd ${slim_dir}/demo/analysis || catchException demo_latency
model=latency_mobilenet_v1_fp32
python latency_predictor.py --model mobilenet_v1 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v1_int8
python latency_predictor.py --model mobilenet_v1 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_fp32
python latency_predictor.py --model mobilenet_v2 --data_type fp32 >${log_path}/${model} 2>&1
print_info $? ${model}
model=latency_mobilenet_v2_int8
python latency_predictor.py --model mobilenet_v2 --data_type int8 >${log_path}/${model} 2>&1
print_info $? ${model}
}
all_latency(){
demo_latency
}
####################################
export all_case_list=(all_distillation all_quant all_prune all_nas )
export all_case_time=0
declare -A all_P0case_dic
all_case_dic=(["all_distillation"]=5 ["all_quant"]=15 ["all_prune"]=1 ["all_nas"]=30 ["all_darts"]=30 ['unstructured_prune']=15 ['dy_qat1']=1)
for key in $(echo ${!all_case_dic[*]});do
all_case_time=`expr ${all_case_time} + ${all_case_dic[$key]}`
done
set -e
echo -e "\033[35m ---- P0case_list length: ${#all_case_list[*]}, cases: ${all_case_list[*]} \033[0m"
echo -e "\033[35m ---- P0case_time: $all_case_time min \033[0m"
set +e
####################################
echo -e "\033[35m ---- start run case \033[0m"
case_num=1
for model in ${all_case_list[*]};do
echo -e "\033[35m ---- running P0case $case_num/${#all_case_list[*]}: ${model} , task time: ${all_case_list[${model}]} min \033[0m"
${model}
let case_num++
done
echo -e "\033[35m ---- end run case \033[0m"
cd ${slim_dir}/logs
FF=`ls *FAIL*|wc -l`
if [ "${FF}" -gt "0" ];then
exit 1
else
exit 0
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册