diff --git a/demo/nas/sa_nas_mobilenetv2.py b/demo/nas/sa_nas_mobilenetv2.py index ce918f77d6e13d098fc13ae7cb574a2f51b766e8..6eb557d16ded2f09f21c5580ba6e9756311691be 100644 --- a/demo/nas/sa_nas_mobilenetv2.py +++ b/demo/nas/sa_nas_mobilenetv2.py @@ -1,1028 +1,315 @@ -#!/usr/bin/env bash -################## -#bash slim_ci_demo_all_case.sh $5 $6; - -print_info(){ -if [ $1 -ne 0 ];then - mv ${log_path}/$2 ${log_path}/FAIL_$2.log - echo -e "\033[31m ${log_path}/FAIL_$2 \033[0m" - echo "fail log as follow" - cat ${log_path}/FAIL_$2.log -else - mv ${log_path}/$2 ${log_path}/SUCCESS_$2.log - echo -e "\033[32m ${log_path}/SUCCESS_$2 \033[0m" - cat ${log_path}/SUCCESS_$2.log -fi -} - -catchException() { - echo $1 failed due to exception >> FAIL_Exception.log -} - -cudaid1=$1; -cudaid2=$2; -echo "cudaid1,cudaid2", ${cudaid1}, ${cudaid2} -export CUDA_VISIBLE_DEVICES=${cudaid1} -#分布式log输出方式 -export PADDLE_LOG_LEVEL=debug - -export FLAGS_fraction_of_gpu_memory_to_use=0.98 -# data PaddleSlim/demo/data/ILSVRC2012 -cd ${slim_dir}/demo -if [ -d "data" ];then - rm -rf data -fi -wget -q https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz --no-check-certificate -tar xf ILSVRC2012_data_demo.tar.gz -mv ILSVRC2012_data_demo data -# download pretrain model -root_url="http://paddle-imagenet-models-name.bj.bcebos.com" -pre_models="MobileNetV1 MobileNetV2 MobileNetV3_large_x1_0_ssld ResNet101_vd MobileNetV2 ResNet34 ResNet50 ResNet50_vd" -if [ -d "pretrain" ];then - rm -rf pretrain -fi -mkdir pretrain && cd pretrain -for model in ${pre_models} -do - if [ ! -f ${model} ]; then - wget -q ${root_url}/${model}_pretrained.tar - tar xf ${model}_pretrained.tar - fi -done - -# 1 dist -demo_distillation_01(){ -cd ${slim_dir}/demo/distillation || catchException demo_distillation -if [ -d "output" ];then - rm -rf output -fi -export CUDA_VISIBLE_DEVICES=${cudaid1} -python distill.py --num_epochs 1 --save_inference True >${log_path}/demo_distillation_ResNet50_vd_T 2>&1 -print_info $? demo_distillation_ResNet50_vd_T - -} - -demo_distillation_02(){ -cd ${slim_dir}/demo/distillation || catchException demo_distillation -if [ -d "output" ];then - rm -rf output -fi - -export CUDA_VISIBLE_DEVICES=${cudaid1} -python distill.py --num_epochs 1 --batch_size 64 --save_inference True \ ---model ResNet50 --teacher_model ResNet101_vd \ ---teacher_pretrained_model ../pretrain/ResNet101_vd_pretrained >${log_path}/demo_distillation_ResNet101_vd_ResNet50_T 2>&1 -print_info $? demo_distillation_ResNet101_vd_ResNet50_T - -python distill.py --num_epochs 1 --batch_size 64 --save_inference True \ ---model MobileNetV2_x0_25 --teacher_model MobileNetV2 \ ---teacher_pretrained_model ../pretrain/MobileNetV2_pretrained >${log_path}/demo_distillation_MobileNetV2_MobileNetV2_x0_25_T 2>&1 -print_info $? demo_distillation_MobileNetV2_MobileNetV2_x0_25_T -} - -demo_deep_mutual_learning(){ -cd ${slim_dir}/demo/deep_mutual_learning || catchException demo_deep_mutual_learning -export CUDA_VISIBLE_DEVICES=${cudaid1} -model=dml_mv1_mv1_gpu1 -CUDA_VISIBLE_DEVICES=${cudaid1} -python dml_train.py --epochs 1 >${log_path}/${model} 2>&1 -print_info $? ${model} -model=dml_mv1_res50_gpu1 -CUDA_VISIBLE_DEVICES=${cudaid1} -python dml_train.py --models='mobilenet-resnet50' --batch_size 128 --epochs 1 >${log_path}/${model} 2>&1 -print_info $? ${model} -} - -all_distillation(){ # 大数据 5个模型 - demo_distillation_01 # 3 - #demo_distillation_02 - #demo_deep_mutual_learning # 2 -} -# 2.1 quant/quant_aware 使用小数据集即可 -demo_quant_quant_aware(){ -cd ${slim_dir}/demo/quant/quant_aware || catchException demo_quant_quant_aware -if [ -d "output" ];then - rm -rf output -fi -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 2.1版本时默认BS=256会报显存不足,故暂时修改成128 -python train.py --model MobileNet --pretrained_model ../../pretrain/MobileNetV1_pretrained \ ---checkpoint_dir ./output/mobilenetv1 --num_epochs 1 --batch_size 128 >${log_path}/demo_quant_quant_aware_v1 2>&1 -print_info $? demo_quant_quant_aware_v1 - -export CUDA_VISIBLE_DEVICES=${cudaid1} -python train.py --model ResNet34 \ ---pretrained_model ../../pretrain/ResNet34_pretrained \ ---checkpoint_dir ./output/ResNet34 --num_epochs 1 >${log_path}/demo_quant_quant_aware_ResNet34_T 2>&1 -print_info $? demo_quant_quant_aware_ResNet34_T -} -# 2.2 quant/quant_embedding -demo_quant_quant_embedding(){ -cd ${slim_dir}/demo/quant/quant_embedding || catchException demo_quant_quant_embedding -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 先使用word2vec的demo数据进行一轮训练,比较量化前infer结果同量化后infer结果different -if [ -d "data" ];then - rm -rf data -fi -wget -q https://sys-p0.bj.bcebos.com/slim_ci/word_2evc_demo_data.tar.gz --no-check-certificate -tar xf word_2evc_demo_data.tar.gz -mv word_2evc_demo_data data -if [ -d "v1_cpu5_b100_lr1dir" ];then - rm -rf v1_cpu5_b100_lr1dir -fi -OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 \ ---dict_path data/test_build_dict --num_passes 1 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir \ - --base_lr 1.0 --print_batch 1000 --with_speed --is_sparse >${log_path}/quant_em_word2vec_T 2>&1 -print_info $? quant_em_word2vec_T -# 量化前infer -python infer.py --infer_epoch --test_dir data/test_mid_dir \ ---dict_path data/test_build_dict_word_to_id_ \ ---batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ \ ---start_index 0 --last_index 0 >${log_path}/quant_em_infer1 2>&1 -print_info $? quant_em_infer1 -# 量化后infer -python infer.py --infer_epoch --test_dir data/test_mid_dir \ ---dict_path data/test_build_dict_word_to_id_ \ ---batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/ --start_index 0 \ ---last_index 0 --emb_quant True >${log_path}/quant_em_infer2 2>&1 -print_info $? quant_em_infer2 -} -# 2.3 quan_post # 小数据集 -demo_quant_quant_post(){ -# 20210425 新增4种离线量化方法 -cd ${slim_dir}/demo/quant/quant_post || catchException demo_quant_quant_post -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 1 导出模型 -python export_model.py --model "MobileNet" --pretrained_model ../../pretrain/MobileNetV1_pretrained \ ---data imagenet >${log_path}/st_quant_post_v1_export 2>&1 -print_info $? st_quant_post_v1_export -# 量化前eval -python eval.py --model_path ./inference_model/MobileNet --model_name model \ ---params_name weights >${log_path}/st_quant_post_v1_eval1 2>&1 -print_info $? st_quant_post_v1_eval1 - -# 3 离线量化 -# 4 量化后eval -for algo in hist avg mse -do -## 不带bc 离线量化 -echo "quant_post train no bc " ${algo} -python quant_post.py --model_path ./inference_model/MobileNet \ ---save_path ./quant_model/${algo}/MobileNet \ ---model_filename model --params_filename weights --algo ${algo} >${log_path}/st_quant_post_v1_T_${algo} 2>&1 -print_info $? st_quant_post_v1_T_${algo} -# 量化后eval -echo "quant_post eval no bc " ${algo} -python eval.py --model_path ./quant_model/${algo}/MobileNet --model_name __model__ \ ---params_name __params__ > ${log_path}/st_quant_post_${algo}_eval2 2>&1 -print_info $? st_quant_post_${algo}_eval2 - -# 带bc参数的 离线量化 -echo "quant_post train bc " ${algo} -python quant_post.py --model_path ./inference_model/MobileNet \ ---save_path ./quant_model/${algo}_bc/MobileNet \ ---model_filename model --params_filename weights \ ---algo ${algo} --bias_correction True >${log_path}/st_quant_post_T_${algo}_bc 2>&1 -print_info $? st_quant_post_T_${algo}_bc - -# 量化后eval -echo "quant_post eval bc " ${algo} -python eval.py --model_path ./quant_model/${algo}_bc/MobileNet --model_name __model__ \ ---params_name __params__ > ${log_path}/st_quant_post_${algo}_bc_eval2 2>&1 -print_info $? st_quant_post_${algo}_bc_eval2 - -done -} - -# 2.3 quant_post_hpo # 小数据集 -demo_quant_quant_post_hpo(){ - -cd ${slim_dir}/demo/quant/quant_post_hpo || catchException demo_quant_quant_post_hpo -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 1.导出模型 -python ../quant_post/export_model.py \ ---model "MobileNet" \ ---pretrained_model ../../pretrain/MobileNetV1_pretrained \ ---data imagenet > ${log_path}/st_quant_post__hpo_v1_export 2>&1 -print_info $? st_quant_post__hpo_v1_export -# 2. quant_post_hpo 设置max_model_quant_count=2 -python quant_post_hpo.py \ ---use_gpu=True \ ---model_path="./inference_model/MobileNet/" \ ---save_path="./inference_model/MobileNet_quant/" \ ---model_filename="model" \ ---params_filename="weights" \ ---max_model_quant_count=2 > ${log_path}/st_quant_post_hpo 2>&1 -print_info $? st_quant_post_hpo -# 3. 量化后eval -python ../quant_post/eval.py \ ---model_path ./inference_model/MobileNet_quant \ ---model_name __model__ \ ---params_name __params__ > ${log_path}/st_quant_post_hpo_eval 2>&1 -print_info $? st_quant_post_hpo_eval - -} - -#2.4 -demo_quant_pact_quant_aware(){ -cd ${slim_dir}/demo/quant/pact_quant_aware || catchException demo_quant_pact_quant_aware -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 普通量化,使用小数据集即可 -# 2.1版本时默认BS=128 会报显存不足,故暂时修改成64 -python train.py --model MobileNetV3_large_x1_0 \ ---pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ ---num_epochs 1 --lr 0.0001 --use_pact False --batch_size 128 >${log_path}/demo_quant_pact_quant_aware_v3_nopact 2>&1 -print_info $? demo_quant_pact_quant_aware_v3_nopact -python train.py --model MobileNetV3_large_x1_0 \ ---pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ ---num_epochs 1 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \ ---step_epochs 2 --l2_decay 1e-5 >${log_path}/demo_quant_pact_quant_aware_v3 2>&1 -print_info $? demo_quant_pact_quant_aware_v3 -# load -python train.py --model MobileNetV3_large_x1_0 \ ---pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ ---num_epochs 2 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \ ---step_epochs 20 --l2_decay 1e-5 \ ---checkpoint_dir ./output/MobileNetV3_large_x1_0/0 \ ---checkpoint_epoch 0 >${log_path}/demo_quant_pact_quant_aware_v3_load 2>&1 -print_info $? demo_quant_pact_quant_aware_v3_load -} - -# 2.5 -demo_dygraph_quant(){ -cd ${slim_dir}/demo/dygraph/quant || catchException demo_dygraph_quant -CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \ ---pretrained_model '../../pretrain/MobileNetV1_pretrained' \ ---num_epochs 1 \ ---batch_size 128 \ -> ${log_path}/dy_quant_v1_gpu1 2>&1 -print_info $? dy_quant_v1_gpu1 -# dy_pact_v3 -CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --lr=0.001 \ ---batch_size 128 \ ---use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \ ---pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ ---num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1 -print_info $? dy_pact_quant_v3_gpu1 -# 多卡训练,以0到3号卡为例 -CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \ -train.py --lr=0.001 \ ---pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \ ---use_pact=True --num_epochs=1 \ ---l2_decay=2e-5 \ ---ls_epsilon=0.1 \ ---batch_size=128 \ ---model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1 -print_info $? dy_pact_quant_v3_gpu4 -} -# 2.6 -ce_tests_dygraph_qat(){ -cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat -ln -s ${slim_dir}/demo/data/ILSVRC2012 -test_samples=1000 # if set as -1, use all test samples -data_path='./ILSVRC2012/' -batch_size=16 -epoch=1 -lr=0.0001 -num_workers=1 -output_dir=$PWD/output_models -for model in mobilenet_v1 -do -# if [ $1 == nopact ];then - # 1 quant train - echo "------1 nopact train--------", ${model} - export CUDA_VISIBLE_DEVICES=${cudaid1} - python ./src/qat.py \ - --arch=${model} \ - --data=${data_path} \ - --epoch=${epoch} \ - --batch_size=32 \ - --num_workers=${num_workers} \ - --lr=${lr} \ - --output_dir=${output_dir} \ - --enable_quant > qat_${model}_gpu1_nw1 2>&1 - # 2 eval before save quant - echo "--------2 eval before save quant -------------", ${model} - python ./src/eval.py \ - --model_path=./output_models/quant_dygraph/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > eval_before_save_${model} 2>&1 - # 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。 - echo "--------3 save_nopact_quant_model-------------", ${model} - python src/save_quant_model.py \ - --load_model_path output_models/quant_dygraph/${model} \ - --save_model_path int8_models/${model} > save_quant_${model} 2>&1 - # 4 - echo "--------4 CPU eval after save nopact quant -------------", ${model} - export CUDA_VISIBLE_DEVICES= - python ./src/eval.py \ - --model_path=./int8_models/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1 -# elif [ $1 == pact ];then - # 1 pact quant train - echo "------1 pact train--------", ${model} - export CUDA_VISIBLE_DEVICES=${cudaid1} - python ./src/qat.py \ - --arch=${model} \ - --data=${data_path} \ - --epoch=${epoch} \ - --batch_size=32 \ - --num_workers=${num_workers} \ - --lr=${lr} \ - --output_dir=$PWD/output_models_pact/ \ - --enable_quant \ - --use_pact > pact_qat_${model}_gpu1_nw1 2>&1 - # 2 eval before save quant - echo "--------2 eval before save pact quant -------------", ${model} - python ./src/eval.py \ - --model_path=./output_models_pact/quant_dygraph/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1 - echo "--------3 save pact quant -------------", ${model} - python src/save_quant_model.py \ - --load_model_path output_models_pact/quant_dygraph/${model} \ - --save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1 - echo "--------4 CPU eval after save pact quant -------------", ${model} - python ./src/eval.py \ - --model_path=./int8_models_pact/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1 -# fi - -done -} - -ce_tests_dygraph_qat(){ -cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat4 -ln -s ${slim_dir}/demo/data/ILSVRC2012 -test_samples=1000 # if set as -1, use all test samples -data_path='./ILSVRC2012/' -batch_size=16 -epoch=1 -lr=0.0001 -num_workers=1 -output_dir=$PWD/output_models -for model in mobilenet_v1 -#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16 -do - -# if [ $1 == nopact ];then - # 1 quant train - echo "------1 nopact train--------", ${model} - export CUDA_VISIBLE_DEVICES=${cudaid1} - python ./src/qat.py \ - --arch=${model} \ - --data=${data_path} \ - --epoch=${epoch} \ - --batch_size=32 \ - --num_workers=${num_workers} \ - --lr=${lr} \ - --output_dir=${output_dir} \ - --enable_quant > qat_${model}_gpu1_nw1 2>&1 - # 2 eval before save quant - echo "--------2 eval before save quant -------------", ${model} - python ./src/eval.py \ - --model_path=./output_models/quant_dygraph/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > eval_before_save_${model} 2>&1 - # 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。 - echo "--------3 save_nopact_quant_model-------------", ${model} - python src/save_quant_model.py \ - --load_model_path output_models/quant_dygraph/${model} \ - --save_model_path int8_models/${model} > save_quant_${model} 2>&1 - # 4 - echo "--------4 CPU eval after save nopact quant -------------", ${model} - export CUDA_VISIBLE_DEVICES= - python ./src/eval.py \ - --model_path=./int8_models/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1 -# elif [ $1 == pact ];then - # 1 pact quant train - echo "------1 pact train--------", ${model} - export CUDA_VISIBLE_DEVICES=${cudaid1} - python ./src/qat.py \ - --arch=${model} \ - --data=${data_path} \ - --epoch=${epoch} \ - --batch_size=32 \ - --num_workers=${num_workers} \ - --lr=${lr} \ - --output_dir=$PWD/output_models_pact/ \ - --enable_quant \ - --use_pact > pact_qat_${model}_gpu1_nw1 2>&1 - # 2 eval before save quant - echo "--------2 eval before save pact quant -------------", ${model} - python ./src/eval.py \ - --model_path=./output_models_pact/quant_dygraph/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1 - echo "--------3 save pact quant -------------", ${model} - python src/save_quant_model.py \ - --load_model_path output_models_pact/quant_dygraph/${model} \ - --save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1 - echo "--------4 CPU eval after save pact quant -------------", ${model} - python ./src/eval.py \ - --model_path=./int8_models_pact/${model} \ - --data_dir=${data_path} \ - --test_samples=${test_samples} \ - --batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1 -# fi - -done -} - -ce_tests_dygraph_ptq(){ -cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_ptq4 -ln -s ${slim_dir}/demo/data/ILSVRC2012 -test_samples=1000 # if set as -1, use all test samples -data_path='./ILSVRC2012/' -batch_size=32 -epoch=1 -output_dir="./output_ptq" -quant_batch_num=10 -quant_batch_size=10 -for model in mobilenet_v1 -#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16 - -do - echo "--------quantize model: ${model}-------------" - export CUDA_VISIBLE_DEVICES=${cudaid1} - # save ptq quant model - python ./src/ptq.py \ - --data=${data_path} \ - --arch=${model} \ - --quant_batch_num=${quant_batch_num} \ - --quant_batch_size=${quant_batch_size} \ - --output_dir=${output_dir} > ${log_path}/ptq_${model} 2>&1 - print_info $? ptq_${model} - - echo "-------- eval fp32_infer model -------------", ${model} - python ./src/test.py \ - --model_path=${output_dir}/${model}/fp32_infer \ - --data_dir=${data_path} \ - --batch_size=${batch_size} \ - --use_gpu=True \ - --test_samples=${test_samples} \ - --ir_optim=False > ${log_path}/ptq_eval_fp32_${model} 2>&1 - print_info $? ptq_eval_fp32_${model} - - echo "-------- eval int8_infer model -------------", ${model} - python ./src/test.py \ - --model_path=${output_dir}/${model}/int8_infer \ - --data_dir=${data_path} \ - --batch_size=${batch_size} \ - --use_gpu=False \ - --test_samples=${test_samples} \ - --ir_optim=False > ${log_path}/ptq_eval_int8_${model} 2>&1 - print_info $? ptq_eval_int8_${model} - -done -} - -#用于更新release分支下无ce_tests_dygraph_ptq case;release分支设置is_develop="False" -is_develop="True" - -all_quant(){ # 10个模型 - if [ "${is_develop}" == "True" ];then - #ce_tests_dygraph_ptq4 - ce_tests_dygraph_ptq - fi - demo_quant_quant_aware # 2个模型 - demo_quant_quant_embedding # 1个模型 - demo_quant_quant_post # 4个策略 - demo_dygraph_quant # 2个模型 - demo_quant_pact_quant_aware # 1个模型 - ce_tests_dygraph_qat # 4个模型 - #ce_tests_dygraph_qat4 - demo_quant_quant_post_hpo -} - -# 3 prune -demo_prune(){ -cd ${slim_dir}/demo/prune || catchException demo_prune -# 3.1 P0 prune - -if [ -d "models" ];then - rm -rf models -fi -export CUDA_VISIBLE_DEVICES=${cudaid1} -python train.py --model "MobileNet" --pruned_ratio 0.31 --data "imagenet" \ ---pretrained_model ../pretrain/MobileNetV1_pretrained/ --num_epochs 1 >${log_path}/prune_v1_T 2>&1 -print_info $? prune_v1_T - -#3.2 prune_fpgm -# slim_prune_fpgm_v1_T -# export CUDA_VISIBLE_DEVICES=${cudaid1} -# python train.py \ -# --model="MobileNet" \ -# --pretrained_model="../pretrain/MobileNetV1_pretrained" \ -# --data="imagenet" \ -# --pruned_ratio=0.3125 \ -# --lr=0.1 \ -# --num_epochs=1 \ -# --test_period=1 \ -# --step_epochs 30 60 90\ -# --l2_decay=3e-5 \ -# --lr_strategy="piecewise_decay" \ -# --criterion="geometry_median" \ -# --model_path="./fpgm_mobilenetv1_models" \ -# --save_inference True >${log_path}/slim_prune_fpgm_v1_T 2>&1 -# print_info $? slim_prune_fpgm_v1_T - -#slim_prune_fpgm_v2_T -export CUDA_VISIBLE_DEVICES=${cudaid1} -#v2 -50% -python train.py \ - --model="MobileNetV2" \ - --pretrained_model="../pretrain/MobileNetV2_pretrained" \ - --data="imagenet" \ - --pruned_ratio=0.325 \ - --lr=0.001 \ - --num_epochs=2 \ - --test_period=1 \ - --step_epochs 30 60 80 \ - --l2_decay=1e-4 \ - --lr_strategy="piecewise_decay" \ - --criterion="geometry_median" \ - --model_path="./output/fpgm_mobilenetv2_models" \ - --save_inference True >${log_path}/slim_prune_fpgm_v2_T 2>&1 -print_info $? slim_prune_fpgm_v2_T -python eval.py --model "MobileNetV2" --data "imagenet" \ ---model_path "./output/fpgm_mobilenetv2_models/0" >${log_path}/slim_prune_fpgm_v2_eval 2>&1 -print_info $? slim_prune_fpgm_v2_eval -# ResNet34 -50 -# export CUDA_VISIBLE_DEVICES=${cudaid1} -# python train.py \ -# --model="ResNet34" \ -# --pretrained_model="../pretrain/ResNet34_pretrained" \ -# --data="imagenet" \ -# --pruned_ratio=0.3125 \ -# --lr=0.001 \ -# --num_epochs=2 \ -# --test_period=1 \ -# --step_epochs 30 60 \ -# --l2_decay=1e-4 \ -# --lr_strategy="piecewise_decay" \ -# --criterion="geometry_median" \ -# --model_path="./output/fpgm_resnet34_50_models" \ -# --save_inference True >${log_path}/slim_prune_fpgm_resnet34_50_T 2>&1 -print_info $? slim_prune_fpgm_resnet34_50_T -python eval.py --model "ResNet34" --data "imagenet" \ ---model_path "./output/fpgm_resnet34_50_models/0" >${log_path}/slim_prune_fpgm_resnet34_50_eval 2>&1 -print_info $? slim_prune_fpgm_resnet34_50_eval -# ResNet34 -42 slim_prune_fpgm_resnet34_42_T -cd ${slim_dir}/demo/prune -export CUDA_VISIBLE_DEVICES=${cudaid1} -python train.py \ - --model="ResNet34" \ - --pretrained_model="../pretrain/ResNet34_pretrained" \ - --data="imagenet" \ - --pruned_ratio=0.25 \ - --num_epochs=2 \ - --test_period=1 \ - --lr_strategy="cosine_decay" \ - --criterion="geometry_median" \ - --model_path="./output/fpgm_resnet34_025_120_models" \ - --save_inference True >${log_path}/slim_prune_fpgm_resnet34_42_T 2>&1 -print_info $? slim_prune_fpgm_resnet34_42_T -python eval.py --model "ResNet34" --data "imagenet" \ ---model_path "./output/fpgm_resnet34_025_120_models/0" >${log_path}/slim_prune_fpgm_resnet34_42_eval 2>&1 -print_info $? slim_prune_fpgm_resnet34_42_eval -# 3.3 prune ResNet50 -export CUDA_VISIBLE_DEVICES=${cudaid1} -# 2.1版本时默认BS=256 会报显存不足,故暂时修改成128 -python train.py --model ResNet50 --pruned_ratio 0.31 --data "imagenet" \ ---save_inference True --pretrained_model ../pretrain/ResNet50_pretrained \ ---num_epochs 1 --batch_size 128 >${log_path}/prune_ResNet50_T 2>&1 -print_info $? prune_ResNet50_T -} - -# 3.4 dygraph_prune -#dy_prune_ResNet34_f42 -demo_dygraph_pruning(){ -cd ${slim_dir}/demo/dygraph/pruning || catchException demo_dygraph_pruning -ln -s ${slim_dir}/demo/data data -CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \ - --use_gpu=True \ - --model="resnet34" \ - --data="imagenet" \ - --pruned_ratio=0.25 \ - --num_epochs=1 \ - --batch_size=128 \ - --lr_strategy="cosine_decay" \ - --criterion="fpgm" \ - --model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet34_f42_gpu1 2>&1 -print_info $? dy_prune_ResNet34_f42_gpu1 -#2.3 恢复训练 通过设置checkpoint选项进行恢复训练: -CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \ - --use_gpu=True \ - --model="resnet34" \ - --data="imagenet" \ - --pruned_ratio=0.25 \ - --num_epochs=2 \ - --batch_size=128 \ - --lr_strategy="cosine_decay" \ - --criterion="fpgm" \ - --model_path="./fpgm_resnet34_025_120_models" \ - --checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet34_f42_gpu1_load 2>&1 -print_info $? dy_prune_ResNet34_f42_gpu1_load - -#2.4. 评估 通过调用eval.py脚本,对剪裁和重训练后的模型在测试数据上进行精度: -CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \ ---checkpoint=./fpgm_resnet34_025_120_models/1 \ ---model="resnet34" \ ---pruned_ratio=0.25 \ ---batch_size=128 >${log_path}/dy_prune_ResNet34_f42_gpu1_eval 2>&1 -print_info $? dy_prune_ResNet34_f42_gpu1_eval - -#2.5. 导出模型 执行以下命令导出用于预测的模型: -CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \ ---checkpoint=./fpgm_resnet34_025_120_models/final \ ---model="resnet34" \ ---pruned_ratio=0.25 \ ---output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet34_f42_gpu1_export 2>&1 -print_info $? dy_prune_ResNet34_f42_gpu1_export - -#add dy_prune_fpgm_mobilenetv1_50_T -CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \ ---log_dir="fpgm_mobilenetv1_train_log" \ -train.py \ - --model="mobilenet_v1" \ - --data="imagenet" \ - --pruned_ratio=0.3125 \ - --lr=0.1 \ - --num_epochs=1 \ - --test_period=1 \ - --step_epochs 30 60 90\ - --l2_decay=3e-5 \ - --lr_strategy="piecewise_decay" \ - --criterion="fpgm" \ - --model_path="./fpgm_mobilenetv1_models" > ${log_path}/dy_prune_fpgm_mobilenetv1_50_T 2>&1 -print_info $? dy_prune_fpgm_mobilenetv1_50_T - -#add dy_prune_fpgm_mobilenetv2_50_T -# CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \ -# --log_dir="fpgm_mobilenetv2_train_log" \ -# train.py \ -# --model="mobilenet_v2" \ -# --data="imagenet" \ -# --pruned_ratio=0.325 \ -# --lr=0.001 \ -# --num_epochs=1 \ -# --test_period=1 \ -# --step_epochs 30 60 80\ -# --l2_decay=1e-4 \ -# --lr_strategy="piecewise_decay" \ -# --criterion="fpgm" \ -# --model_path="./fpgm_mobilenetv2_models" > ${log_path}/dy_prune_fpgm_mobilenetv2_50_T 2>&1 -# print_info $? dy_prune_fpgm_mobilenetv2_50_T - -#add -CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \ ---log_dir="fpgm_resnet34_f_42_train_log" \ -train.py \ - --use_gpu=True \ - --model="resnet34" \ - --data="imagenet" \ - --pruned_ratio=0.25 \ - --batch_size=128 \ - --num_epochs=1 \ - --test_period=1 \ - --lr_strategy="cosine_decay" \ - --criterion="fpgm" \ - --model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet34_f42_gpu2 2>&1 -print_info $? dy_prune_ResNet34_f42_gpu2 -} - -# 3.5 st unstructured_prune -demo_unstructured_prune(){ -cd ${slim_dir}/demo/unstructured_prune || catchException demo_unstructured_prune -# 注意,上述命令中的batch_size为多张卡上总的batch_size,即一张卡的batch_size为256。 -## sparsity: -30%, accuracy: 70%/89% -export CUDA_VISIBLE_DEVICES=${cudaid1} -python train.py \ ---batch_size 256 \ ---pretrained_model ../pretrain/MobileNetV1_pretrained \ ---lr 0.05 \ ---pruning_mode threshold \ ---threshold 0.01 \ ---data imagenet \ ---lr_strategy piecewise_decay \ ---step_epochs 1 2 3 \ ---num_epochs 1 \ ---test_period 1 \ ---model_period 1 \ ---model_path st_unstructured_models >${log_path}/st_unstructured_prune_threshold_T 2>&1 -print_info $? st_unstructured_prune_threshold_T -# eval -python evaluate.py \ - --pruned_model=st_unstructured_models \ - --data="imagenet" >${log_path}/st_unstructured_prune_threshold_eval 2>&1 -print_info $? st_unstructured_prune_threshold_eval - -## sparsity: -55%, accuracy: 67%+/87%+ -export CUDA_VISIBLE_DEVICES=${cudaid1} -python train.py \ ---batch_size 256 \ ---pretrained_model ../pretrain/MobileNetV1_pretrained \ ---lr 0.05 \ ---pruning_mode ratio \ ---ratio 0.55 \ ---data imagenet \ ---lr_strategy piecewise_decay \ ---step_epochs 1 2 3 \ ---num_epochs 1 \ ---test_period 1 \ ---model_period 1 \ ---model_path st_ratio_models >${log_path}/st_unstructured_prune_ratio_T 2>&1 -print_info $? st_unstructured_prune_ratio_T - -# MNIST数据集 -# python train.py \ -# --batch_size 256 \ -# --pretrained_model ../pretrain/MobileNetV1_pretrained \ -# --lr 0.05 \ -# --pruning_mode threshold \ -# --threshold 0.01 \ -# --data mnist \ -# --lr_strategy piecewise_decay \ -# --step_epochs 1 2 3 \ -# --num_epochs 1 \ -# --test_period 1 \ -# --model_period 1 \ -# --model_path st_unstructured_models_mnist >${log_path}/st_unstructured_prune_threshold_mnist_T 2>&1 -# print_info $? st_unstructured_prune_threshold_mnist_T -# eval -python evaluate.py \ - --pruned_model=st_unstructured_models_mnist \ - --data="mnist" >${log_path}/st_unstructured_prune_threshold_mnist_eval 2>&1 -print_info $? st_unstructured_prune_threshold_mnist_eval - -export CUDA_VISIBLE_DEVICES=${cudaid2} -python -m paddle.distributed.launch \ - --log_dir="st_unstructured_prune_gmp_log" \ - train.py \ - --batch_size 64 \ - --data imagenet \ - --pruning_mode ratio \ - --ratio 0.75 \ - --lr 0.005 \ - --model MobileNet \ - --num_epochs 1 \ - --test_period 5 \ - --model_period 10 \ - --pretrained_model ../pretrain/MobileNetV1_pretrained \ - --model_path "./models" \ - --step_epochs 71 88 \ - --initial_ratio 0.15 \ - --pruning_steps 5 \ - --stable_epochs 0 \ - --pruning_epochs 54 \ - --tunning_epochs 54 \ - --last_epoch -1 \ - --prune_params_type conv1x1_only \ - --pruning_strategy gmp > ${log_path}/st_unstructured_prune_ratio_gmp 2>&1 -print_info $? st_unstructured_prune_ratio_gmp -} -demo_dygraph_unstructured_pruning(){ -# dy_threshold -cd ${slim_dir}/demo/dygraph/unstructured_pruning || catchException demo_dygraph_unstructured_pruning -export CUDA_VISIBLE_DEVICES=${cudaid2} -## sparsity: -55%, accuracy: 67%+/87%+ -python -m paddle.distributed.launch \ ---log_dir train_dy_ratio_log train.py \ ---data imagenet \ ---lr 0.05 \ ---pruning_mode ratio \ ---ratio 0.55 \ ---batch_size 256 \ ---lr_strategy piecewise_decay \ ---step_epochs 1 2 3 \ ---num_epochs 1 \ ---test_period 1 \ ---model_period 1 \ ---model_path dy_ratio_models >${log_path}/dy_prune_ratio_T 2>&1 -print_info $? dy_prune_ratio_T - -## sparsity: -30%, accuracy: 70%/89% -export CUDA_VISIBLE_DEVICES=${cudaid2} -python -m paddle.distributed.launch \ ---log_dir train_dy_threshold_log train.py \ ---data imagenet \ ---lr 0.05 \ ---pruning_mode threshold \ ---threshold 0.01 \ ---batch_size 256 \ ---lr_strategy piecewise_decay \ ---step_epochs 1 2 3 \ ---num_epochs 1 \ ---test_period 1 \ ---model_period 1 \ ---model_path dy_threshold_models >${log_path}/dy_threshold_prune_T 2>&1 -print_info $? dy_threshold_prune_T -# eval -python evaluate.py --pruned_model dy_threshold_models/model.pdparams \ ---data imagenet >${log_path}/dy_threshold_prune_eval 2>&1 -print_info $? dy_threshold_prune_eval - -# load -python -m paddle.distributed.launch \ ---log_dir train_dy_threshold_load_log train.py \ ---data imagenet \ ---lr 0.05 \ ---pruning_mode threshold \ ---threshold 0.01 \ ---batch_size 256 \ ---lr_strategy piecewise_decay \ ---step_epochs 1 2 3 \ ---num_epochs 3 \ ---test_period 1 \ ---model_period 1 \ ---model_path dy_threshold_models_new \ ---pretrained_model dy_threshold_models/model.pdparams \ ---last_epoch 1 > ${log_path}/dy_threshold_prune_T_load 2>&1 -print_info $? dy_threshold_prune_T_load -# cifar10 -# python train.py --data cifar10 --lr 0.05 \ -# --pruning_mode threshold \ -# --threshold 0.01 \ -# --model_period 1 \ -# --num_epochs 2 >${log_path}/dy_threshold_prune_cifar10_T 2>&1 -# print_info $? dy_threshold_prune_cifar10_T - -export CUDA_VISIBLE_DEVICES=${cudaid2} -python -m paddle.distributed.launch \ - --log_dir="dy_unstructured_prune_gmp_log" \ - train.py \ - --batch_size 64 \ - --data imagenet \ - --pruning_mode ratio \ - --ratio 0.75 \ - --lr 0.005 \ - --num_epochs 1 \ - --test_period 5 \ - --model_period 10 \ - --model_path "./models" \ - --step_epochs 71 88 \ - --initial_ratio 0.15 \ - --pruning_steps 100 \ - --stable_epochs 0 \ - --pruning_epochs 54 \ - --tunning_epochs 54 \ - --last_epoch -1 \ - --pruning_strategy gmp \ - --skip_params_type exclude_conv1x1 ${log_path}/dy_unstructured_prune_ratio_gmp 2>&1 -print_info $? dy_unstructured_prune_ratio_gmp -} - -################## -all_prune(){ # 7个模型 - demo_prune - demo_dygraph_pruning - demo_unstructured_prune # 4个模型 - demo_dygraph_unstructured_pruning -} - -#4 nas -demo_nas(){ -# 4.1 sa_nas_mobilenetv2 -cd ${slim_dir}/demo/nas || catchException demo_nas -model=demo_nas_sa_nas_v2_T_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --port 8881 >${log_path}/${model} 2>&1 -print_info $? ${model} -} -demo_nas4(){ -cd ${slim_dir}/demo/nas || catchException demo_nas4 -model=sa_nas_v2_T_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --retain_epoch 1 --port 8881 >${log_path}/${model} 2>&1 -print_info $? ${model} -# 4.2 block_sa_nas_mobilenetv2 -model=block_sa_nas_v2_T_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python block_sa_nas_mobilenetv2.py --search_steps 1 --port 8883 >${log_path}/${model} 2>&1 -print_info $? ${model} - -# 4.3 rl_nas -model=rl_nas_v2_T_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python rl_nas_mobilenetv2.py --search_steps 1 --port 8885 >${log_path}/${model} 2>&1 -print_info $? ${model} - -# 4.4 parl_nas -#model=parl_nas_v2_T_1card -#CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py \ -#--search_steps 1 --port 8887 >${log_path}/${model} 2>&1 -#print_info $? ${model} -} - -all_nas(){ # 3 个模型 - demo_nas -} -# 5 darts -# search 1card # DARTS一阶近似搜索方法 -demo_darts(){ -cd ${slim_dir}/demo/darts || catchException demo_darts -model=darts1_search_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 \ ---use_multiprocess False \ ---batch_size 32 >${log_path}/${model} 2>&1 -print_info $? ${model} -#train -model=pcdarts_train_1card -CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \ ---epochs 1 --use_multiprocess False \ ---batch_size 32 >${log_path}/${model} 2>&1 -print_info $? ${model} -# 可视化 -#pip install graphviz -#model=slim_darts_visualize_pcdarts -#python visualize.py PC_DARTS > ${log_path}/${model} 2>&1 -#print_info $? ${model} -} - - - -slimfacenet(){ -cd ${slim_dir}/demo/slimfacenet || catchException slimfacenet -ln -s ${data_path}/slim/slimfacenet/CASIA CASIA -ln -s ${data_path}/slim/slimfacenet/lfw lfw -model=slim_slimfacenet_B75_train -CUDA_VISIBLE_DEVICES=${cudaid1} python -u train_eval.py \ ---train_data_dir=./CASIA/ --test_data_dir=./lfw/ \ ---action train --model=SlimFaceNet_B_x0_75 \ ---start_epoch 0 --total_epoch 1 >${log_path}/slim_slimfacenet_B75_train 2>&1 -print_info $? ${model} -model=slim_slimfacenet_B75_quan -CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \ ---action quant --train_data_dir=./CASIA/ \ ---test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_quan 2>&1 -print_info $? ${model} -model=slim_slimfacenet_B75_eval -CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \ ---action test --train_data_dir=./CASIA/ \ ---test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_eval 2>&1 -print_info $? ${model} -} - -all_darts(){ # 2个模型 - demo_darts - #slimfacenet 需要删掉 -} - -demo_latency(){ -cd ${slim_dir}/demo/analysis || catchException demo_latency -model=latency_mobilenet_v1_fp32 -python latency_predictor.py --model mobilenet_v1 --data_type fp32 >${log_path}/${model} 2>&1 -print_info $? ${model} -model=latency_mobilenet_v1_int8 -python latency_predictor.py --model mobilenet_v1 --data_type int8 >${log_path}/${model} 2>&1 -print_info $? ${model} -model=latency_mobilenet_v2_fp32 -python latency_predictor.py --model mobilenet_v2 --data_type fp32 >${log_path}/${model} 2>&1 -print_info $? ${model} -model=latency_mobilenet_v2_int8 -python latency_predictor.py --model mobilenet_v2 --data_type int8 >${log_path}/${model} 2>&1 -print_info $? ${model} -} - -all_latency(){ - demo_latency -} - -#################################### -export all_case_list=(all_distillation all_quant all_prune all_nas ) - -export all_case_time=0 -declare -A all_P0case_dic -all_case_dic=(["all_distillation"]=5 ["all_quant"]=15 ["all_prune"]=1 ["all_nas"]=30 ["all_darts"]=30 ['unstructured_prune']=15 ['dy_qat1']=1) -for key in $(echo ${!all_case_dic[*]});do - all_case_time=`expr ${all_case_time} + ${all_case_dic[$key]}` -done -set -e -echo -e "\033[35m ---- P0case_list length: ${#all_case_list[*]}, cases: ${all_case_list[*]} \033[0m" -echo -e "\033[35m ---- P0case_time: $all_case_time min \033[0m" -set +e -#################################### -echo -e "\033[35m ---- start run case \033[0m" -case_num=1 -for model in ${all_case_list[*]};do - echo -e "\033[35m ---- running P0case $case_num/${#all_case_list[*]}: ${model} , task time: ${all_case_list[${model}]} min \033[0m" - ${model} - let case_num++ -done -echo -e "\033[35m ---- end run case \033[0m" - -cd ${slim_dir}/logs -FF=`ls *FAIL*|wc -l` -if [ "${FF}" -gt "0" ];then - exit 1 -else - exit 0 -fi +import sys +sys.path.append('..') +import numpy as np +import argparse +import ast +import time +import argparse +import ast +import logging +import paddle +import paddle.nn as nn +import paddle.static as static +import paddle.nn.functional as F +import paddle.vision.transforms as T +from paddle import ParamAttr +from paddleslim.analysis import flops +from paddleslim.nas import SANAS +from paddleslim.common import get_logger +from optimizer import create_optimizer +import imagenet_reader + +_logger = get_logger(__name__, level=logging.INFO) + + +def build_program(main_program, + startup_program, + image_shape, + dataset, + archs, + args, + places, + is_test=False): + with static.program_guard(main_program, startup_program): + with paddle.utils.unique_name.guard(): + data_shape = [None] + image_shape + data = static.data(name='data', shape=data_shape, dtype='float32') + label = static.data(name='label', shape=[None, 1], dtype='int64') + if args.data == 'cifar10': + paddle.assign(paddle.reshape(label, [-1, 1]), label) + if is_test: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=False, + batch_size=args.batch_size, + return_list=False, + shuffle=False) + else: + data_loader = paddle.io.DataLoader( + dataset, + places=places, + feed_list=[data, label], + drop_last=True, + batch_size=args.batch_size, + return_list=False, + shuffle=True, + use_shared_memory=True, + num_workers=4) + output = archs(data) + output = static.nn.fc(x=output, size=args.class_dim) + + softmax_out = F.softmax(output) + cost = F.cross_entropy(softmax_out, label=label) + avg_cost = paddle.mean(cost) + acc_top1 = paddle.metric.accuracy( + input=softmax_out, label=label, k=1) + acc_top5 = paddle.metric.accuracy( + input=softmax_out, label=label, k=5) + + if is_test == False: + optimizer = create_optimizer(args) + optimizer.minimize(avg_cost) + return data_loader, avg_cost, acc_top1, acc_top5 + + +def search_mobilenetv2(config, args, image_size, is_server=True): + image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) + train_dataset = paddle.vision.datasets.Cifar10( + mode='train', transform=transform, backend='cv2') + val_dataset = paddle.vision.datasets.Cifar10( + mode='test', transform=transform, backend='cv2') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] + if is_server: + ### start a server and a client + sa_nas = SANAS( + config, + server_addr=(args.server_address, args.port), + search_steps=args.search_steps, + is_server=True) + else: + ### start a client + sa_nas = SANAS( + config, + server_addr=(args.server_address, args.port), + search_steps=args.search_steps, + is_server=False) + + for step in range(args.search_steps): + archs = sa_nas.next_archs()[0] + + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() + train_loader, avg_cost, acc_top1, acc_top5 = build_program( + train_program, startup_program, image_shape, train_dataset, archs, + args, places) + + current_flops = flops(train_program) + print('step: {}, current_flops: {}'.format(step, current_flops)) + if current_flops > int(321208544): + continue + + test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( + test_program, + startup_program, + image_shape, + val_dataset, + archs, + args, + place, + is_test=True) + test_program = test_program.clone(for_test=True) + + exe = static.Executor(place) + exe.run(startup_program) + + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( + train_program).with_data_parallel( + loss_name=avg_cost.name, build_strategy=build_strategy) + for epoch_id in range(args.retain_epoch): + for batch_id, data in enumerate(train_loader()): + fetches = [avg_cost.name] + s_time = time.time() + outs = exe.run(train_compiled_program, + feed=data, + fetch_list=fetches)[0] + batch_time = time.time() - s_time + if batch_id % 10 == 0: + _logger.info( + 'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. + format(step, epoch_id, batch_id, outs[0], batch_time)) + + reward = [] + for batch_id, data in enumerate(test_loader()): + test_fetches = [ + test_avg_cost.name, test_acc_top1.name, test_acc_top5.name + ] + batch_reward = exe.run(test_program, + feed=data, + fetch_list=test_fetches) + reward_avg = np.mean(np.array(batch_reward), axis=1) + reward.append(reward_avg) + + _logger.info( + 'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. + format(step, batch_id, batch_reward[0], batch_reward[1], + batch_reward[2])) + + finally_reward = np.mean(np.array(reward), axis=0) + _logger.info( + 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( + finally_reward[0], finally_reward[1], finally_reward[2])) + + sa_nas.reward(float(finally_reward[1])) + + +def test_search_result(tokens, image_size, args, config): + places = static.cuda_places() if args.use_gpu else static.cpu_places() + place = places[0] + + sa_nas = SANAS( + config, + server_addr=(args.server_address, args.port), + search_steps=args.search_steps, + is_server=True) + + image_shape = [3, image_size, image_size] + if args.data == 'cifar10': + transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])]) + train_dataset = paddle.vision.datasets.Cifar10( + mode='train', transform=transform, backend='cv2') + val_dataset = paddle.vision.datasets.Cifar10( + mode='test', transform=transform, backend='cv2') + + elif args.data == 'imagenet': + train_dataset = imagenet_reader.ImageNetDataset(mode='train') + val_dataset = imagenet_reader.ImageNetDataset(mode='val') + + archs = sa_nas.tokens2arch(tokens)[0] + + train_program = static.Program() + test_program = static.Program() + startup_program = static.Program() + train_loader, avg_cost, acc_top1, acc_top5 = build_program( + train_program, startup_program, image_shape, train_dataset, archs, args, + places) + + current_flops = flops(train_program) + print('current_flops: {}'.format(current_flops)) + test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program( + test_program, + startup_program, + image_shape, + val_dataset, + archs, + args, + place, + is_test=True) + + test_program = test_program.clone(for_test=True) + + exe = static.Executor(place) + exe.run(startup_program) + + build_strategy = static.BuildStrategy() + train_compiled_program = static.CompiledProgram( + train_program).with_data_parallel( + loss_name=avg_cost.name, build_strategy=build_strategy) + for epoch_id in range(args.retain_epoch): + for batch_id, data in enumerate(train_loader()): + fetches = [avg_cost.name] + s_time = time.time() + outs = exe.run(train_compiled_program, + feed=data, + fetch_list=fetches)[0] + batch_time = time.time() - s_time + if batch_id % 10 == 0: + _logger.info( + 'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'. + format(epoch_id, batch_id, outs[0], batch_time)) + + reward = [] + for batch_id, data in enumerate(test_loader()): + test_fetches = [ + test_avg_cost.name, test_acc_top1.name, test_acc_top5.name + ] + batch_reward = exe.run(test_program, + feed=data, + fetch_list=test_fetches) + reward_avg = np.mean(np.array(batch_reward), axis=1) + reward.append(reward_avg) + + _logger.info( + 'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'. + format(batch_id, batch_reward[0], batch_reward[1], batch_reward[ + 2])) + + finally_reward = np.mean(np.array(reward), axis=0) + _logger.info( + 'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format( + finally_reward[0], finally_reward[1], finally_reward[2])) + + +if __name__ == '__main__': + + parser = argparse.ArgumentParser( + description='SA NAS MobileNetV2 cifar10 argparase') + parser.add_argument( + '--use_gpu', + type=ast.literal_eval, + default=True, + help='Whether to use GPU in train/test model.') + parser.add_argument( + '--batch_size', type=int, default=256, help='batch size.') + parser.add_argument( + '--class_dim', type=int, default=10, help='classify number.') + parser.add_argument( + '--data', + type=str, + default='cifar10', + choices=['cifar10', 'imagenet'], + help='server address.') + parser.add_argument( + '--is_server', + type=ast.literal_eval, + default=True, + help='Whether to start a server.') + parser.add_argument( + '--search_steps', + type=int, + default=100, + help='controller server number.') + parser.add_argument( + '--server_address', type=str, default="", help='server ip.') + parser.add_argument('--port', type=int, default=8881, help='server port') + parser.add_argument( + '--retain_epoch', type=int, default=5, help='epoch for each token.') + parser.add_argument('--lr', type=float, default=0.1, help='learning rate.') + args = parser.parse_args() + print(args) + + if args.data == 'cifar10': + image_size = 32 + block_num = 3 + elif args.data == 'imagenet': + image_size = 224 + block_num = 6 + else: + raise NotImplementedError( + 'data must in [cifar10, imagenet], but received: {}'.format( + args.data)) + + config = [('MobileNetV2Space')] + paddle.enable_static() + search_mobilenetv2(config, args, image_size, is_server=args.is_server)