Update sa_nas_mobilenetv2.py

dc44c944 · iamWHTWD · GitHub · 76356ff6 · dc44c944
隐藏空白更改
内联并排

Showing with 315 addition and 1028 deletion

demo/nas/sa_nas_mobilenetv2.py demo/nas/sa_nas_mobilenetv2.py +315 -1028

未找到文件。
--- a/demo/nas/sa_nas_mobilenetv2.py
+++ b/demo/nas/sa_nas_mobilenetv2.py
-#!/usr/bin/env bash
+import sys
-##################
+sys.path.append('..')
-#bash slim_ci_demo_all_case.sh $5 $6;
+import numpy as np
+import argparse
-print_info(){
+import ast
-if [ $1 -ne 0 ];then
+import time
-    mv ${log_path}/$2 ${log_path}/FAIL_$2.log
+import argparse
-    echo -e "\033[31m ${log_path}/FAIL_$2 \033[0m"
+import ast
-    echo "fail log as follow"
+import logging
-    cat  ${log_path}/FAIL_$2.log
+import paddle
-else
+import paddle.nn as nn
-    mv ${log_path}/$2 ${log_path}/SUCCESS_$2.log
+import paddle.static as static
-    echo -e "\033[32m ${log_path}/SUCCESS_$2 \033[0m"
+import paddle.nn.functional as F
-    cat  ${log_path}/SUCCESS_$2.log
+import paddle.vision.transforms as T
-fi
+from paddle import ParamAttr
-}
+from paddleslim.analysis import flops
+from paddleslim.nas import SANAS
-catchException() {
+from paddleslim.common import get_logger
-  echo $1 failed due to exception >> FAIL_Exception.log
+from optimizer import create_optimizer
-}
+import imagenet_reader
-cudaid1=$1;
+_logger = get_logger(__name__, level=logging.INFO)
-cudaid2=$2;
-echo "cudaid1,cudaid2", ${cudaid1}, ${cudaid2}
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+def build_program(main_program,
-#分布式log输出方式
+                  startup_program,
-export PADDLE_LOG_LEVEL=debug
+                  image_shape,
+                  dataset,
-export FLAGS_fraction_of_gpu_memory_to_use=0.98
+                  archs,
-# data PaddleSlim/demo/data/ILSVRC2012
+                  args,
-cd ${slim_dir}/demo
+                  places,
-if [ -d "data" ];then
+                  is_test=False):
-    rm -rf data
+    with static.program_guard(main_program, startup_program):
-fi
+        with paddle.utils.unique_name.guard():
-wget -q https://sys-p0.bj.bcebos.com/slim_ci/ILSVRC2012_data_demo.tar.gz --no-check-certificate
+            data_shape = [None] + image_shape
-tar xf ILSVRC2012_data_demo.tar.gz
+            data = static.data(name='data', shape=data_shape, dtype='float32')
-mv ILSVRC2012_data_demo data
+            label = static.data(name='label', shape=[None, 1], dtype='int64')
-# download pretrain model
+            if args.data == 'cifar10':
-root_url="http://paddle-imagenet-models-name.bj.bcebos.com"
+                paddle.assign(paddle.reshape(label, [-1, 1]), label)
-pre_models="MobileNetV1 MobileNetV2 MobileNetV3_large_x1_0_ssld ResNet101_vd MobileNetV2 ResNet34 ResNet50 ResNet50_vd"
+            if is_test:
-if [ -d "pretrain" ];then
+                data_loader = paddle.io.DataLoader(
-    rm -rf pretrain
+                    dataset,
-fi
+                    places=places,
-mkdir pretrain && cd pretrain
+                    feed_list=[data, label],
-for model in ${pre_models}
+                    drop_last=False,
-do
+                    batch_size=args.batch_size,
-    if [ ! -f ${model} ]; then
+                    return_list=False,
-        wget -q ${root_url}/${model}_pretrained.tar
+                    shuffle=False)
-        tar xf ${model}_pretrained.tar
+            else:
-    fi
+                data_loader = paddle.io.DataLoader(
-done
+                    dataset,
+                    places=places,
-# 1 dist
+                    feed_list=[data, label],
-demo_distillation_01(){
+                    drop_last=True,
-cd ${slim_dir}/demo/distillation || catchException demo_distillation
+                    batch_size=args.batch_size,
-if [ -d "output" ];then
+                    return_list=False,
-    rm -rf output
+                    shuffle=True,
-fi
+                    use_shared_memory=True,
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+                    num_workers=4)
-python distill.py --num_epochs 1 --save_inference True >${log_path}/demo_distillation_ResNet50_vd_T 2>&1
+            output = archs(data)
-print_info $? demo_distillation_ResNet50_vd_T
+            output = static.nn.fc(x=output, size=args.class_dim)
-}
+            softmax_out = F.softmax(output)
+            cost = F.cross_entropy(softmax_out, label=label)
-demo_distillation_02(){
+            avg_cost = paddle.mean(cost)
-cd ${slim_dir}/demo/distillation || catchException demo_distillation
+            acc_top1 = paddle.metric.accuracy(
-if [ -d "output" ];then
+                input=softmax_out, label=label, k=1)
-    rm -rf output
+            acc_top5 = paddle.metric.accuracy(
-fi
+                input=softmax_out, label=label, k=5)
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+            if is_test == False:
-python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
+                optimizer = create_optimizer(args)
--model ResNet50 --teacher_model ResNet101_vd \
+                optimizer.minimize(avg_cost)
--teacher_pretrained_model ../pretrain/ResNet101_vd_pretrained >${log_path}/demo_distillation_ResNet101_vd_ResNet50_T 2>&1
+    return data_loader, avg_cost, acc_top1, acc_top5
-print_info $? demo_distillation_ResNet101_vd_ResNet50_T
-python distill.py --num_epochs 1 --batch_size 64 --save_inference True \
+def search_mobilenetv2(config, args, image_size, is_server=True):
--model MobileNetV2_x0_25 --teacher_model MobileNetV2 \
+    image_shape = [3, image_size, image_size]
--teacher_pretrained_model ../pretrain/MobileNetV2_pretrained >${log_path}/demo_distillation_MobileNetV2_MobileNetV2_x0_25_T 2>&1
+    if args.data == 'cifar10':
-print_info $? demo_distillation_MobileNetV2_MobileNetV2_x0_25_T
+        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
-}
+        train_dataset = paddle.vision.datasets.Cifar10(
+            mode='train', transform=transform, backend='cv2')
-demo_deep_mutual_learning(){
+        val_dataset = paddle.vision.datasets.Cifar10(
-cd ${slim_dir}/demo/deep_mutual_learning || catchException demo_deep_mutual_learning
+            mode='test', transform=transform, backend='cv2')
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-model=dml_mv1_mv1_gpu1
+    elif args.data == 'imagenet':
-CUDA_VISIBLE_DEVICES=${cudaid1}
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
-python dml_train.py --epochs 1 >${log_path}/${model} 2>&1
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
-print_info $? ${model}
-model=dml_mv1_res50_gpu1
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
-CUDA_VISIBLE_DEVICES=${cudaid1}
+    place = places[0]
-python dml_train.py --models='mobilenet-resnet50' --batch_size 128 --epochs 1 >${log_path}/${model} 2>&1
+    if is_server:
-print_info $? ${model}
+        ### start a server and a client
-}
+        sa_nas = SANAS(
+            config,
-all_distillation(){ # 大数据 5个模型
+            server_addr=(args.server_address, args.port),
-    demo_distillation_01   # 3
+            search_steps=args.search_steps,
-    #demo_distillation_02
+            is_server=True)
-    #demo_deep_mutual_learning   # 2
+    else:
-}
+        ### start a client
-# 2.1 quant/quant_aware 使用小数据集即可
+        sa_nas = SANAS(
-demo_quant_quant_aware(){
+            config,
-cd ${slim_dir}/demo/quant/quant_aware || catchException demo_quant_quant_aware
+            server_addr=(args.server_address, args.port),
-if [ -d "output" ];then
+            search_steps=args.search_steps,
-    rm -rf output
+            is_server=False)
-fi
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+    for step in range(args.search_steps):
-# 2.1版本时默认BS=256会报显存不足，故暂时修改成128
+        archs = sa_nas.next_archs()[0]
-python train.py --model MobileNet --pretrained_model ../../pretrain/MobileNetV1_pretrained \
--checkpoint_dir ./output/mobilenetv1 --num_epochs 1 --batch_size 128 >${log_path}/demo_quant_quant_aware_v1 2>&1
+        train_program = static.Program()
-print_info $? demo_quant_quant_aware_v1
+        test_program = static.Program()
+        startup_program = static.Program()
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+        train_loader, avg_cost, acc_top1, acc_top5 = build_program(
-python train.py --model ResNet34 \
+            train_program, startup_program, image_shape, train_dataset, archs,
--pretrained_model ../../pretrain/ResNet34_pretrained \
+            args, places)
--checkpoint_dir ./output/ResNet34 --num_epochs 1 >${log_path}/demo_quant_quant_aware_ResNet34_T 2>&1
-print_info $? demo_quant_quant_aware_ResNet34_T
+        current_flops = flops(train_program)
-}
+        print('step: {}, current_flops: {}'.format(step, current_flops))
-# 2.2 quant/quant_embedding
+        if current_flops > int(321208544):
-demo_quant_quant_embedding(){
+            continue
-cd ${slim_dir}/demo/quant/quant_embedding || catchException demo_quant_quant_embedding
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+        test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
-# 先使用word2vec的demo数据进行一轮训练，比较量化前infer结果同量化后infer结果different
+            test_program,
-if [ -d "data" ];then
+            startup_program,
-    rm -rf data
+            image_shape,
-fi
+            val_dataset,
-wget -q https://sys-p0.bj.bcebos.com/slim_ci/word_2evc_demo_data.tar.gz --no-check-certificate
+            archs,
-tar xf word_2evc_demo_data.tar.gz
+            args,
-mv word_2evc_demo_data data
+            place,
-if [ -d "v1_cpu5_b100_lr1dir" ];then
+            is_test=True)
-    rm -rf v1_cpu5_b100_lr1dir
+        test_program = test_program.clone(for_test=True)
-fi
-OPENBLAS_NUM_THREADS=1 CPU_NUM=5 python train.py --train_data_dir data/convert_text8 \
+        exe = static.Executor(place)
--dict_path data/test_build_dict --num_passes 1 --batch_size 100 --model_output_dir v1_cpu5_b100_lr1dir \
+        exe.run(startup_program)
- --base_lr 1.0 --print_batch 1000 --with_speed --is_sparse >${log_path}/quant_em_word2vec_T 2>&1
-print_info $? quant_em_word2vec_T
+        build_strategy = static.BuildStrategy()
-# 量化前infer
+        train_compiled_program = static.CompiledProgram(
-python infer.py --infer_epoch --test_dir data/test_mid_dir \
+            train_program).with_data_parallel(
--dict_path data/test_build_dict_word_to_id_ \
+                loss_name=avg_cost.name, build_strategy=build_strategy)
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/  \
+        for epoch_id in range(args.retain_epoch):
--start_index 0 --last_index 0 >${log_path}/quant_em_infer1 2>&1
+            for batch_id, data in enumerate(train_loader()):
-print_info $? quant_em_infer1
+                fetches = [avg_cost.name]
-# 量化后infer
+                s_time = time.time()
-python infer.py --infer_epoch --test_dir data/test_mid_dir \
+                outs = exe.run(train_compiled_program,
--dict_path data/test_build_dict_word_to_id_ \
+                               feed=data,
--batch_size 20000 --model_dir v1_cpu5_b100_lr1dir/  --start_index 0 \
+                               fetch_list=fetches)[0]
--last_index 0 --emb_quant True >${log_path}/quant_em_infer2 2>&1
+                batch_time = time.time() - s_time
-print_info $? quant_em_infer2
+                if batch_id % 10 == 0:
-}
+                    _logger.info(
-# 2.3 quan_post # 小数据集
+                        'TRAIN: steps: {}, epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
-demo_quant_quant_post(){
+                        format(step, epoch_id, batch_id, outs[0], batch_time))
-# 20210425 新增4种离线量化方法
-cd ${slim_dir}/demo/quant/quant_post || catchException demo_quant_quant_post
+        reward = []
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+        for batch_id, data in enumerate(test_loader()):
-# 1 导出模型
+            test_fetches = [
-python export_model.py --model "MobileNet" --pretrained_model ../../pretrain/MobileNetV1_pretrained \
+                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
--data imagenet >${log_path}/st_quant_post_v1_export 2>&1
+            ]
-print_info $? st_quant_post_v1_export
+            batch_reward = exe.run(test_program,
-# 量化前eval
+                                   feed=data,
-python eval.py --model_path ./inference_model/MobileNet --model_name model \
+                                   fetch_list=test_fetches)
--params_name weights >${log_path}/st_quant_post_v1_eval1 2>&1
+            reward_avg = np.mean(np.array(batch_reward), axis=1)
-print_info $? st_quant_post_v1_eval1
+            reward.append(reward_avg)
-# 3 离线量化
+            _logger.info(
-# 4 量化后eval
+                'TEST: step: {}, batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
-for algo in hist avg mse
+                format(step, batch_id, batch_reward[0], batch_reward[1],
-do
+                       batch_reward[2]))
-## 不带bc 离线量化
-echo "quant_post train no bc " ${algo}
+        finally_reward = np.mean(np.array(reward), axis=0)
-python quant_post.py --model_path ./inference_model/MobileNet \
+        _logger.info(
--save_path ./quant_model/${algo}/MobileNet \
+            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
--model_filename model --params_filename weights --algo ${algo} >${log_path}/st_quant_post_v1_T_${algo} 2>&1
+                finally_reward[0], finally_reward[1], finally_reward[2]))
-print_info $? st_quant_post_v1_T_${algo}
-# 量化后eval
+        sa_nas.reward(float(finally_reward[1]))
-echo "quant_post eval no bc " ${algo}
-python eval.py --model_path ./quant_model/${algo}/MobileNet --model_name __model__ \
--params_name __params__ > ${log_path}/st_quant_post_${algo}_eval2 2>&1
+def test_search_result(tokens, image_size, args, config):
-print_info $? st_quant_post_${algo}_eval2
+    places = static.cuda_places() if args.use_gpu else static.cpu_places()
+    place = places[0]
-# 带bc参数的 离线量化
-echo "quant_post train bc " ${algo}
+    sa_nas = SANAS(
-python quant_post.py --model_path ./inference_model/MobileNet \
+        config,
--save_path ./quant_model/${algo}_bc/MobileNet \
+        server_addr=(args.server_address, args.port),
--model_filename model --params_filename weights \
+        search_steps=args.search_steps,
--algo ${algo} --bias_correction True >${log_path}/st_quant_post_T_${algo}_bc 2>&1
+        is_server=True)
-print_info $? st_quant_post_T_${algo}_bc
+    image_shape = [3, image_size, image_size]
-# 量化后eval
+    if args.data == 'cifar10':
-echo "quant_post eval bc " ${algo}
+        transform = T.Compose([T.Transpose(), T.Normalize([127.5], [127.5])])
-python eval.py --model_path ./quant_model/${algo}_bc/MobileNet --model_name __model__ \
+        train_dataset = paddle.vision.datasets.Cifar10(
--params_name __params__ > ${log_path}/st_quant_post_${algo}_bc_eval2 2>&1
+            mode='train', transform=transform, backend='cv2')
-print_info $? st_quant_post_${algo}_bc_eval2
+        val_dataset = paddle.vision.datasets.Cifar10(
+            mode='test', transform=transform, backend='cv2')
-done
-}
+    elif args.data == 'imagenet':
+        train_dataset = imagenet_reader.ImageNetDataset(mode='train')
-# 2.3 quant_post_hpo # 小数据集
+        val_dataset = imagenet_reader.ImageNetDataset(mode='val')
-demo_quant_quant_post_hpo(){
+    archs = sa_nas.tokens2arch(tokens)[0]
-cd ${slim_dir}/demo/quant/quant_post_hpo || catchException demo_quant_quant_post_hpo
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+    train_program = static.Program()
-# 1.导出模型
+    test_program = static.Program()
-python ../quant_post/export_model.py \
+    startup_program = static.Program()
--model "MobileNet" \
+    train_loader, avg_cost, acc_top1, acc_top5 = build_program(
--pretrained_model ../../pretrain/MobileNetV1_pretrained \
+        train_program, startup_program, image_shape, train_dataset, archs, args,
--data imagenet > ${log_path}/st_quant_post__hpo_v1_export 2>&1
+        places)
-print_info $? st_quant_post__hpo_v1_export
-# 2. quant_post_hpo 设置max_model_quant_count=2
+    current_flops = flops(train_program)
-python quant_post_hpo.py  \
+    print('current_flops: {}'.format(current_flops))
--use_gpu=True     \
+    test_loader, test_avg_cost, test_acc_top1, test_acc_top5 = build_program(
--model_path="./inference_model/MobileNet/"   \
+        test_program,
--save_path="./inference_model/MobileNet_quant/"   \
+        startup_program,
--model_filename="model"    \
+        image_shape,
--params_filename="weights"  \
+        val_dataset,
--max_model_quant_count=2 > ${log_path}/st_quant_post_hpo 2>&1
+        archs,
-print_info $? st_quant_post_hpo
+        args,
-# 3. 量化后eval
+        place,
-python ../quant_post/eval.py \
+        is_test=True)
--model_path ./inference_model/MobileNet_quant \
--model_name __model__ \
+    test_program = test_program.clone(for_test=True)
--params_name __params__ > ${log_path}/st_quant_post_hpo_eval 2>&1
-print_info $? st_quant_post_hpo_eval
+    exe = static.Executor(place)
+    exe.run(startup_program)
-}
+    build_strategy = static.BuildStrategy()
-#2.4
+    train_compiled_program = static.CompiledProgram(
-demo_quant_pact_quant_aware(){
+        train_program).with_data_parallel(
-cd ${slim_dir}/demo/quant/pact_quant_aware || catchException demo_quant_pact_quant_aware
+            loss_name=avg_cost.name, build_strategy=build_strategy)
-export CUDA_VISIBLE_DEVICES=${cudaid1}
+    for epoch_id in range(args.retain_epoch):
-# 普通量化,使用小数据集即可
+        for batch_id, data in enumerate(train_loader()):
-# 2.1版本时默认BS=128 会报显存不足，故暂时修改成64
+            fetches = [avg_cost.name]
-python train.py --model MobileNetV3_large_x1_0 \
+            s_time = time.time()
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+            outs = exe.run(train_compiled_program,
--num_epochs 1 --lr 0.0001 --use_pact False --batch_size 128 >${log_path}/demo_quant_pact_quant_aware_v3_nopact 2>&1
+                           feed=data,
-print_info $? demo_quant_pact_quant_aware_v3_nopact
+                           fetch_list=fetches)[0]
-python train.py --model MobileNetV3_large_x1_0 \
+            batch_time = time.time() - s_time
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+            if batch_id % 10 == 0:
--num_epochs 1 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
+                _logger.info(
--step_epochs 2 --l2_decay 1e-5 >${log_path}/demo_quant_pact_quant_aware_v3 2>&1
+                    'TRAIN: epoch: {}, batch: {}, cost: {}, batch_time: {}ms'.
-print_info $? demo_quant_pact_quant_aware_v3
+                    format(epoch_id, batch_id, outs[0], batch_time))
-# load
-python train.py --model MobileNetV3_large_x1_0 \
+        reward = []
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+        for batch_id, data in enumerate(test_loader()):
--num_epochs 2 --lr 0.0001 --use_pact True --batch_size 64 --lr_strategy=piecewise_decay \
+            test_fetches = [
--step_epochs 20 --l2_decay 1e-5 \
+                test_avg_cost.name, test_acc_top1.name, test_acc_top5.name
--checkpoint_dir ./output/MobileNetV3_large_x1_0/0 \
+            ]
--checkpoint_epoch 0 >${log_path}/demo_quant_pact_quant_aware_v3_load 2>&1
+            batch_reward = exe.run(test_program,
-print_info $? demo_quant_pact_quant_aware_v3_load
+                                   feed=data,
-}
+                                   fetch_list=test_fetches)
+            reward_avg = np.mean(np.array(batch_reward), axis=1)
-# 2.5
+            reward.append(reward_avg)
-demo_dygraph_quant(){
-cd ${slim_dir}/demo/dygraph/quant || catchException demo_dygraph_quant
+            _logger.info(
-CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --model='mobilenet_v1' \
+                'TEST: batch: {}, avg_cost: {}, acc_top1: {}, acc_top5: {}'.
--pretrained_model '../../pretrain/MobileNetV1_pretrained' \
+                format(batch_id, batch_reward[0], batch_reward[1], batch_reward[
--num_epochs 1 \
+                    2]))
--batch_size 128 \
-> ${log_path}/dy_quant_v1_gpu1 2>&1
+        finally_reward = np.mean(np.array(reward), axis=0)
-print_info $? dy_quant_v1_gpu1
+        _logger.info(
-# dy_pact_v3
+            'FINAL TEST: avg_cost: {}, acc_top1: {}, acc_top5: {}'.format(
-CUDA_VISIBLE_DEVICES=${cudaid1}  python train.py  --lr=0.001 \
+                finally_reward[0], finally_reward[1], finally_reward[2]))
--batch_size 128 \
--use_pact=True --num_epochs=1 --l2_decay=2e-5 --ls_epsilon=0.1 \
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+if __name__ == '__main__':
--num_epochs 1 > ${log_path}/dy_pact_quant_v3_gpu1 2>&1
-print_info $? dy_pact_quant_v3_gpu1
+    parser = argparse.ArgumentParser(
-# 多卡训练，以0到3号卡为例
+        description='SA NAS MobileNetV2 cifar10 argparase')
-CUDA_VISIBLE_DEVICES=${cudaid2}  python -m paddle.distributed.launch \
+    parser.add_argument(
-train.py  --lr=0.001 \
+        '--use_gpu',
--pretrained_model ../../pretrain/MobileNetV3_large_x1_0_ssld_pretrained \
+        type=ast.literal_eval,
--use_pact=True --num_epochs=1 \
+        default=True,
--l2_decay=2e-5 \
+        help='Whether to use GPU in train/test model.')
--ls_epsilon=0.1 \
+    parser.add_argument(
--batch_size=128 \
+        '--batch_size', type=int, default=256, help='batch size.')
--model_save_dir output > ${log_path}/dy_pact_quant_v3_gpu4 2>&1
+    parser.add_argument(
-print_info $? dy_pact_quant_v3_gpu4
+        '--class_dim', type=int, default=10, help='classify number.')
-}
+    parser.add_argument(
-# 2.6
+        '--data',
-ce_tests_dygraph_qat(){
+        type=str,
-cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat
+        default='cifar10',
-ln -s ${slim_dir}/demo/data/ILSVRC2012
+        choices=['cifar10', 'imagenet'],
-test_samples=1000  # if set as -1, use all test samples
+        help='server address.')
-data_path='./ILSVRC2012/'
+    parser.add_argument(
-batch_size=16
+        '--is_server',
-epoch=1
+        type=ast.literal_eval,
-lr=0.0001
+        default=True,
-num_workers=1
+        help='Whether to start a server.')
-output_dir=$PWD/output_models
+    parser.add_argument(
-for model in mobilenet_v1
+        '--search_steps',
-do
+        type=int,
-#    if [ $1 == nopact ];then
+        default=100,
-        # 1 quant train
+        help='controller server number.')
-        echo "------1 nopact train--------", ${model}
+    parser.add_argument(
-        export CUDA_VISIBLE_DEVICES=${cudaid1}
+        '--server_address', type=str, default="", help='server ip.')
-        python ./src/qat.py \
+    parser.add_argument('--port', type=int, default=8881, help='server port')
-        --arch=${model} \
+    parser.add_argument(
-        --data=${data_path} \
+        '--retain_epoch', type=int, default=5, help='epoch for each token.')
-        --epoch=${epoch} \
+    parser.add_argument('--lr', type=float, default=0.1, help='learning rate.')
-        --batch_size=32 \
+    args = parser.parse_args()
-        --num_workers=${num_workers} \
+    print(args)
-        --lr=${lr} \
-        --output_dir=${output_dir} \
+    if args.data == 'cifar10':
-        --enable_quant > qat_${model}_gpu1_nw1 2>&1
+        image_size = 32
-        # 2 eval before save quant
+        block_num = 3
-        echo "--------2 eval before save quant -------------", ${model}
+    elif args.data == 'imagenet':
-        python ./src/eval.py \
+        image_size = 224
-        --model_path=./output_models/quant_dygraph/${model} \
+        block_num = 6
-        --data_dir=${data_path} \
+    else:
-        --test_samples=${test_samples} \
+        raise NotImplementedError(
-        --batch_size=${batch_size} > eval_before_save_${model} 2>&1
+            'data must in [cifar10, imagenet], but received: {}'.format(
-        # 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
+                args.data))
-        echo "--------3 save_nopact_quant_model-------------", ${model}
-        python src/save_quant_model.py \
+    config = [('MobileNetV2Space')]
-          --load_model_path output_models/quant_dygraph/${model} \
+    paddle.enable_static()
-          --save_model_path int8_models/${model} > save_quant_${model} 2>&1
+    search_mobilenetv2(config, args, image_size, is_server=args.is_server)
-        # 4
-        echo "--------4 CPU eval after save nopact quant -------------", ${model}
-        export CUDA_VISIBLE_DEVICES=
-        python ./src/eval.py \
-        --model_path=./int8_models/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
-#    elif [ $1 == pact ];then
-    # 1 pact quant train
-        echo "------1 pact train--------", ${model}
-        export CUDA_VISIBLE_DEVICES=${cudaid1}
-        python ./src/qat.py \
-        --arch=${model} \
-        --data=${data_path} \
-        --epoch=${epoch} \
-        --batch_size=32 \
-        --num_workers=${num_workers} \
-        --lr=${lr} \
-        --output_dir=$PWD/output_models_pact/ \
-        --enable_quant \
-        --use_pact > pact_qat_${model}_gpu1_nw1 2>&1
-        # 2 eval before save quant
-        echo "--------2 eval before save pact quant -------------", ${model}
-        python ./src/eval.py \
-        --model_path=./output_models_pact/quant_dygraph/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
-        echo "--------3  save pact quant -------------", ${model}
-        python src/save_quant_model.py \
-          --load_model_path output_models_pact/quant_dygraph/${model} \
-          --save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
-        echo "--------4 CPU eval after save pact quant -------------", ${model}
-        python ./src/eval.py \
-        --model_path=./int8_models_pact/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
-#    fi
-done
-}
-ce_tests_dygraph_qat(){
-cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_qat4
-ln -s ${slim_dir}/demo/data/ILSVRC2012
-test_samples=1000  # if set as -1, use all test samples
-data_path='./ILSVRC2012/'
-batch_size=16
-epoch=1
-lr=0.0001
-num_workers=1
-output_dir=$PWD/output_models
-for model in mobilenet_v1
-#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
-do
-#    if [ $1 == nopact ];then
-        # 1 quant train
-        echo "------1 nopact train--------", ${model}
-        export CUDA_VISIBLE_DEVICES=${cudaid1}
-        python ./src/qat.py \
-        --arch=${model} \
-        --data=${data_path} \
-        --epoch=${epoch} \
-        --batch_size=32 \
-        --num_workers=${num_workers} \
-        --lr=${lr} \
-        --output_dir=${output_dir} \
-        --enable_quant > qat_${model}_gpu1_nw1 2>&1
-        # 2 eval before save quant
-        echo "--------2 eval before save quant -------------", ${model}
-        python ./src/eval.py \
-        --model_path=./output_models/quant_dygraph/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > eval_before_save_${model} 2>&1
-        # 3 CPU上部署量化模型,需要使用`test/save_quant_model.py`脚本进行模型转换。
-        echo "--------3 save_nopact_quant_model-------------", ${model}
-        python src/save_quant_model.py \
-          --load_model_path output_models/quant_dygraph/${model} \
-          --save_model_path int8_models/${model} > save_quant_${model} 2>&1
-        # 4
-        echo "--------4 CPU eval after save nopact quant -------------", ${model}
-        export CUDA_VISIBLE_DEVICES=
-        python ./src/eval.py \
-        --model_path=./int8_models/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > cpu_eval_after_save_${model} 2>&1
-#    elif [ $1 == pact ];then
-    # 1 pact quant train
-        echo "------1 pact train--------", ${model}
-        export CUDA_VISIBLE_DEVICES=${cudaid1}
-        python ./src/qat.py \
-        --arch=${model} \
-        --data=${data_path} \
-        --epoch=${epoch} \
-        --batch_size=32 \
-        --num_workers=${num_workers} \
-        --lr=${lr} \
-        --output_dir=$PWD/output_models_pact/ \
-        --enable_quant \
-        --use_pact > pact_qat_${model}_gpu1_nw1 2>&1
-        # 2 eval before save quant
-        echo "--------2 eval before save pact quant -------------", ${model}
-        python ./src/eval.py \
-        --model_path=./output_models_pact/quant_dygraph/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > eval_before_pact_save_${model} 2>&1
-        echo "--------3  save pact quant -------------", ${model}
-        python src/save_quant_model.py \
-          --load_model_path output_models_pact/quant_dygraph/${model} \
-          --save_model_path int8_models_pact/${model} > save_pact_quant_${model} 2>&1
-        echo "--------4 CPU eval after save pact quant -------------", ${model}
-        python ./src/eval.py \
-        --model_path=./int8_models_pact/${model} \
-        --data_dir=${data_path} \
-        --test_samples=${test_samples} \
-        --batch_size=${batch_size} > cpu_eval_after_pact_save_${model} 2>&1
-#    fi
-done
-}
-ce_tests_dygraph_ptq(){
-cd ${slim_dir}/ce_tests/dygraph/quant || catchException ce_tests_dygraph_ptq4
-ln -s ${slim_dir}/demo/data/ILSVRC2012
-test_samples=1000  # if set as -1, use all test samples
-data_path='./ILSVRC2012/'
-batch_size=32
-epoch=1
-output_dir="./output_ptq"
-quant_batch_num=10
-quant_batch_size=10
-for model in mobilenet_v1
-#for model in mobilenet_v1 mobilenet_v2 resnet50 vgg16
-do
-    echo "--------quantize model: ${model}-------------"
-    export CUDA_VISIBLE_DEVICES=${cudaid1}
-    # save ptq quant model
-    python ./src/ptq.py \
-        --data=${data_path} \
-        --arch=${model} \
-        --quant_batch_num=${quant_batch_num} \
-        --quant_batch_size=${quant_batch_size} \
-        --output_dir=${output_dir} > ${log_path}/ptq_${model} 2>&1
-        print_info $? ptq_${model}
-    echo "-------- eval fp32_infer model -------------", ${model}
-    python ./src/test.py \
-        --model_path=${output_dir}/${model}/fp32_infer \
-        --data_dir=${data_path} \
-        --batch_size=${batch_size} \
-        --use_gpu=True \
-        --test_samples=${test_samples} \
-        --ir_optim=False > ${log_path}/ptq_eval_fp32_${model} 2>&1
-        print_info $? ptq_eval_fp32_${model}
-    echo "-------- eval int8_infer model -------------", ${model}
-    python ./src/test.py \
-        --model_path=${output_dir}/${model}/int8_infer \
-        --data_dir=${data_path} \
-        --batch_size=${batch_size} \
-        --use_gpu=False \
-        --test_samples=${test_samples} \
-        --ir_optim=False > ${log_path}/ptq_eval_int8_${model} 2>&1
-        print_info $? ptq_eval_int8_${model}
-done
-}
-#用于更新release分支下无ce_tests_dygraph_ptq case；release分支设置is_develop="False"
-is_develop="True"
-all_quant(){ # 10个模型
-    if [ "${is_develop}" == "True" ];then
-      #ce_tests_dygraph_ptq4
-      ce_tests_dygraph_ptq
-    fi
-    demo_quant_quant_aware    # 2个模型
-    demo_quant_quant_embedding  # 1个模型
-    demo_quant_quant_post   # 4个策略
-    demo_dygraph_quant    # 2个模型
-    demo_quant_pact_quant_aware  # 1个模型
-    ce_tests_dygraph_qat  # 4个模型
-    #ce_tests_dygraph_qat4
-    demo_quant_quant_post_hpo
-}
-# 3 prune
-demo_prune(){
-cd ${slim_dir}/demo/prune  || catchException demo_prune
-# 3.1 P0 prune
-if [ -d "models" ];then
-    rm -rf models
-fi
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-python train.py --model "MobileNet" --pruned_ratio 0.31 --data "imagenet" \
--pretrained_model ../pretrain/MobileNetV1_pretrained/ --num_epochs 1 >${log_path}/prune_v1_T 2>&1
-print_info $? prune_v1_T
-#3.2 prune_fpgm
-# slim_prune_fpgm_v1_T
-# export CUDA_VISIBLE_DEVICES=${cudaid1}
-# python train.py \
-#     --model="MobileNet" \
-#     --pretrained_model="../pretrain/MobileNetV1_pretrained" \
-#     --data="imagenet" \
-#     --pruned_ratio=0.3125 \
-#     --lr=0.1 \
-#     --num_epochs=1 \
-#     --test_period=1 \
-#     --step_epochs 30 60 90\
-#     --l2_decay=3e-5 \
-#     --lr_strategy="piecewise_decay" \
-#     --criterion="geometry_median" \
-#     --model_path="./fpgm_mobilenetv1_models" \
-#     --save_inference True  >${log_path}/slim_prune_fpgm_v1_T 2>&1
-# print_info $? slim_prune_fpgm_v1_T
-#slim_prune_fpgm_v2_T
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-#v2 -50%
-python train.py \
-    --model="MobileNetV2" \
-    --pretrained_model="../pretrain/MobileNetV2_pretrained" \
-    --data="imagenet" \
-    --pruned_ratio=0.325 \
-    --lr=0.001 \
-    --num_epochs=2 \
-    --test_period=1 \
-    --step_epochs 30 60 80 \
-    --l2_decay=1e-4 \
-    --lr_strategy="piecewise_decay" \
-    --criterion="geometry_median" \
-    --model_path="./output/fpgm_mobilenetv2_models" \
-    --save_inference True >${log_path}/slim_prune_fpgm_v2_T 2>&1
-print_info $? slim_prune_fpgm_v2_T
-python eval.py --model "MobileNetV2" --data "imagenet" \
--model_path "./output/fpgm_mobilenetv2_models/0" >${log_path}/slim_prune_fpgm_v2_eval 2>&1
-print_info $? slim_prune_fpgm_v2_eval
-# ResNet34 -50
-# export CUDA_VISIBLE_DEVICES=${cudaid1}
-# python train.py \
-#     --model="ResNet34" \
-#     --pretrained_model="../pretrain/ResNet34_pretrained" \
-#     --data="imagenet" \
-#     --pruned_ratio=0.3125 \
-#     --lr=0.001 \
-#     --num_epochs=2 \
-#     --test_period=1 \
-#     --step_epochs 30 60 \
-#     --l2_decay=1e-4 \
-#     --lr_strategy="piecewise_decay" \
-#     --criterion="geometry_median" \
-#     --model_path="./output/fpgm_resnet34_50_models" \
-#     --save_inference True >${log_path}/slim_prune_fpgm_resnet34_50_T 2>&1
-print_info $? slim_prune_fpgm_resnet34_50_T
-python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_50_models/0" >${log_path}/slim_prune_fpgm_resnet34_50_eval 2>&1
-print_info $? slim_prune_fpgm_resnet34_50_eval
-# ResNet34 -42 slim_prune_fpgm_resnet34_42_T
-cd ${slim_dir}/demo/prune
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-python train.py \
-    --model="ResNet34" \
-    --pretrained_model="../pretrain/ResNet34_pretrained" \
-    --data="imagenet" \
-    --pruned_ratio=0.25 \
-    --num_epochs=2 \
-    --test_period=1 \
-    --lr_strategy="cosine_decay" \
-    --criterion="geometry_median" \
-    --model_path="./output/fpgm_resnet34_025_120_models" \
-    --save_inference True >${log_path}/slim_prune_fpgm_resnet34_42_T 2>&1
-print_info $? slim_prune_fpgm_resnet34_42_T
-python eval.py --model "ResNet34" --data "imagenet" \
--model_path "./output/fpgm_resnet34_025_120_models/0" >${log_path}/slim_prune_fpgm_resnet34_42_eval 2>&1
-print_info $? slim_prune_fpgm_resnet34_42_eval
-# 3.3 prune ResNet50
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-# 2.1版本时默认BS=256 会报显存不足，故暂时修改成128
-python train.py --model ResNet50 --pruned_ratio 0.31 --data "imagenet" \
--save_inference True --pretrained_model ../pretrain/ResNet50_pretrained \
--num_epochs 1 --batch_size 128 >${log_path}/prune_ResNet50_T 2>&1
-print_info $? prune_ResNet50_T
-}
-# 3.4 dygraph_prune
-#dy_prune_ResNet34_f42
-demo_dygraph_pruning(){
-cd ${slim_dir}/demo/dygraph/pruning  || catchException demo_dygraph_pruning
-ln -s ${slim_dir}/demo/data data
-CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
-    --use_gpu=True \
-    --model="resnet34" \
-    --data="imagenet" \
-    --pruned_ratio=0.25 \
-    --num_epochs=1 \
-    --batch_size=128 \
-    --lr_strategy="cosine_decay" \
-    --criterion="fpgm" \
-    --model_path="./fpgm_resnet34_025_120_models" >${log_path}/dy_prune_ResNet34_f42_gpu1 2>&1
-print_info $? dy_prune_ResNet34_f42_gpu1
-#2.3 恢复训练  通过设置checkpoint选项进行恢复训练：
-CUDA_VISIBLE_DEVICES=${cudaid1} python train.py \
-    --use_gpu=True \
-    --model="resnet34" \
-    --data="imagenet" \
-    --pruned_ratio=0.25 \
-    --num_epochs=2 \
-    --batch_size=128 \
-    --lr_strategy="cosine_decay" \
-    --criterion="fpgm" \
-    --model_path="./fpgm_resnet34_025_120_models" \
-    --checkpoint="./fpgm_resnet34_025_120_models/0" >${log_path}/dy_prune_ResNet34_f42_gpu1_load 2>&1
-print_info $? dy_prune_ResNet34_f42_gpu1_load
-#2.4. 评估  通过调用eval.py脚本，对剪裁和重训练后的模型在测试数据上进行精度：
-CUDA_VISIBLE_DEVICES=${cudaid1} python eval.py \
--checkpoint=./fpgm_resnet34_025_120_models/1 \
--model="resnet34" \
--pruned_ratio=0.25 \
--batch_size=128 >${log_path}/dy_prune_ResNet34_f42_gpu1_eval 2>&1
-print_info $? dy_prune_ResNet34_f42_gpu1_eval
-#2.5. 导出模型   执行以下命令导出用于预测的模型：
-CUDA_VISIBLE_DEVICES=${cudaid1} python export_model.py \
--checkpoint=./fpgm_resnet34_025_120_models/final \
--model="resnet34" \
--pruned_ratio=0.25 \
--output_path=./infer_final/resnet > ${log_path}/dy_prune_ResNet34_f42_gpu1_export 2>&1
-print_info $? dy_prune_ResNet34_f42_gpu1_export
-#add dy_prune_fpgm_mobilenetv1_50_T
-CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_mobilenetv1_train_log" \
-train.py \
-    --model="mobilenet_v1" \
-    --data="imagenet" \
-    --pruned_ratio=0.3125 \
-    --lr=0.1 \
-    --num_epochs=1 \
-    --test_period=1 \
-    --step_epochs 30 60 90\
-    --l2_decay=3e-5 \
-    --lr_strategy="piecewise_decay" \
-    --criterion="fpgm" \
-    --model_path="./fpgm_mobilenetv1_models" > ${log_path}/dy_prune_fpgm_mobilenetv1_50_T 2>&1
-print_info $? dy_prune_fpgm_mobilenetv1_50_T
-#add dy_prune_fpgm_mobilenetv2_50_T
-# CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
-# --log_dir="fpgm_mobilenetv2_train_log" \
-# train.py \
-#     --model="mobilenet_v2" \
-#     --data="imagenet" \
-#     --pruned_ratio=0.325 \
-#     --lr=0.001 \
-#     --num_epochs=1 \
-#     --test_period=1 \
-#     --step_epochs 30 60 80\
-#     --l2_decay=1e-4 \
-#     --lr_strategy="piecewise_decay" \
-#     --criterion="fpgm" \
-#     --model_path="./fpgm_mobilenetv2_models" > ${log_path}/dy_prune_fpgm_mobilenetv2_50_T 2>&1
-# print_info $? dy_prune_fpgm_mobilenetv2_50_T
-#add
-CUDA_VISIBLE_DEVICES=${cudaid2} python -m paddle.distributed.launch \
--log_dir="fpgm_resnet34_f_42_train_log" \
-train.py \
-    --use_gpu=True \
-    --model="resnet34" \
-    --data="imagenet" \
-    --pruned_ratio=0.25 \
-    --batch_size=128 \
-    --num_epochs=1 \
-    --test_period=1 \
-    --lr_strategy="cosine_decay" \
-    --criterion="fpgm" \
-    --model_path="./fpgm_resnet34_025_120_models" > ${log_path}/dy_prune_ResNet34_f42_gpu2 2>&1
-print_info $? dy_prune_ResNet34_f42_gpu2
-}
-# 3.5 st unstructured_prune
-demo_unstructured_prune(){
-cd ${slim_dir}/demo/unstructured_prune  || catchException demo_unstructured_prune
-# 注意，上述命令中的batch_size为多张卡上总的batch_size，即一张卡的batch_size为256。
-## sparsity: -30%, accuracy: 70%/89%
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_unstructured_models >${log_path}/st_unstructured_prune_threshold_T 2>&1
-print_info $? st_unstructured_prune_threshold_T
-# eval
-python evaluate.py \
-       --pruned_model=st_unstructured_models \
-       --data="imagenet"  >${log_path}/st_unstructured_prune_threshold_eval 2>&1
-print_info $? st_unstructured_prune_threshold_eval
-## sparsity: -55%, accuracy: 67%+/87%+
-export CUDA_VISIBLE_DEVICES=${cudaid1}
-python train.py \
--batch_size 256 \
--pretrained_model ../pretrain/MobileNetV1_pretrained \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--data imagenet \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path st_ratio_models >${log_path}/st_unstructured_prune_ratio_T 2>&1
-print_info $? st_unstructured_prune_ratio_T
-# MNIST数据集
-# python train.py \
-# --batch_size 256 \
-# --pretrained_model ../pretrain/MobileNetV1_pretrained \
-# --lr 0.05 \
-# --pruning_mode threshold \
-# --threshold 0.01 \
-# --data mnist \
-# --lr_strategy piecewise_decay \
-# --step_epochs 1 2 3 \
-# --num_epochs 1 \
-# --test_period 1 \
-# --model_period 1 \
-# --model_path st_unstructured_models_mnist >${log_path}/st_unstructured_prune_threshold_mnist_T 2>&1
-# print_info $? st_unstructured_prune_threshold_mnist_T
-# eval
-python evaluate.py \
-       --pruned_model=st_unstructured_models_mnist \
-       --data="mnist"  >${log_path}/st_unstructured_prune_threshold_mnist_eval 2>&1
-print_info $? st_unstructured_prune_threshold_mnist_eval
-export CUDA_VISIBLE_DEVICES=${cudaid2}
-python -m paddle.distributed.launch \
-          --log_dir="st_unstructured_prune_gmp_log" \
-          train.py \
-          --batch_size 64 \
-          --data imagenet \
-          --pruning_mode ratio \
-          --ratio 0.75 \
-          --lr 0.005 \
-          --model MobileNet \
-          --num_epochs 1 \
-          --test_period 5 \
-          --model_period 10 \
-          --pretrained_model ../pretrain/MobileNetV1_pretrained \
-          --model_path "./models" \
-          --step_epochs  71 88 \
-          --initial_ratio 0.15 \
-          --pruning_steps 5 \
-          --stable_epochs 0 \
-          --pruning_epochs 54 \
-          --tunning_epochs 54 \
-          --last_epoch -1 \
-          --prune_params_type conv1x1_only \
-          --pruning_strategy gmp > ${log_path}/st_unstructured_prune_ratio_gmp 2>&1
-print_info $? st_unstructured_prune_ratio_gmp
-}
-demo_dygraph_unstructured_pruning(){
-# dy_threshold
-cd ${slim_dir}/demo/dygraph/unstructured_pruning || catchException demo_dygraph_unstructured_pruning
-export CUDA_VISIBLE_DEVICES=${cudaid2}
-## sparsity: -55%, accuracy: 67%+/87%+
-python -m paddle.distributed.launch \
--log_dir train_dy_ratio_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode ratio \
--ratio 0.55 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_ratio_models >${log_path}/dy_prune_ratio_T 2>&1
-print_info $? dy_prune_ratio_T
-## sparsity: -30%, accuracy: 70%/89%
-export CUDA_VISIBLE_DEVICES=${cudaid2}
-python -m paddle.distributed.launch \
--log_dir train_dy_threshold_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 1 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models >${log_path}/dy_threshold_prune_T 2>&1
-print_info $? dy_threshold_prune_T
-# eval
-python evaluate.py --pruned_model dy_threshold_models/model.pdparams \
--data imagenet >${log_path}/dy_threshold_prune_eval 2>&1
-print_info $? dy_threshold_prune_eval
-# load
-python -m paddle.distributed.launch \
--log_dir train_dy_threshold_load_log train.py \
--data imagenet \
--lr 0.05 \
--pruning_mode threshold \
--threshold 0.01 \
--batch_size 256 \
--lr_strategy piecewise_decay \
--step_epochs 1 2 3 \
--num_epochs 3 \
--test_period 1 \
--model_period 1 \
--model_path dy_threshold_models_new \
--pretrained_model dy_threshold_models/model.pdparams \
--last_epoch 1 > ${log_path}/dy_threshold_prune_T_load 2>&1
-print_info $? dy_threshold_prune_T_load
-# cifar10
-# python train.py --data cifar10 --lr 0.05 \
-# --pruning_mode threshold \
-# --threshold 0.01 \
-# --model_period 1 \
-# --num_epochs 2 >${log_path}/dy_threshold_prune_cifar10_T 2>&1
-# print_info $? dy_threshold_prune_cifar10_T
-export CUDA_VISIBLE_DEVICES=${cudaid2}
-python -m paddle.distributed.launch \
-          --log_dir="dy_unstructured_prune_gmp_log" \
-          train.py \
-          --batch_size 64 \
-          --data imagenet \
-          --pruning_mode ratio \
-          --ratio 0.75 \
-          --lr 0.005 \
-          --num_epochs 1 \
-          --test_period 5 \
-          --model_period 10 \
-          --model_path "./models" \
-          --step_epochs 71 88 \
-          --initial_ratio 0.15 \
-          --pruning_steps 100 \
-          --stable_epochs 0 \
-          --pruning_epochs 54 \
-          --tunning_epochs 54 \
-          --last_epoch -1 \
-          --pruning_strategy gmp \
-          --skip_params_type exclude_conv1x1 ${log_path}/dy_unstructured_prune_ratio_gmp 2>&1
-print_info $? dy_unstructured_prune_ratio_gmp
-}
-##################
-all_prune(){ # 7个模型
-    demo_prune
-    demo_dygraph_pruning
-    demo_unstructured_prune   # 4个模型
-    demo_dygraph_unstructured_pruning
-}
-#4 nas
-demo_nas(){
-# 4.1 sa_nas_mobilenetv2
-cd ${slim_dir}/demo/nas  || catchException demo_nas
-model=demo_nas_sa_nas_v2_T_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --port 8881 >${log_path}/${model} 2>&1
-print_info $? ${model}
-}
-demo_nas4(){
-cd ${slim_dir}/demo/nas || catchException demo_nas4
-model=sa_nas_v2_T_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python sa_nas_mobilenetv2.py --search_steps 1 --retain_epoch 1 --port 8881 >${log_path}/${model} 2>&1
-print_info $? ${model}
-# 4.2 block_sa_nas_mobilenetv2
-model=block_sa_nas_v2_T_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python block_sa_nas_mobilenetv2.py --search_steps 1 --port 8883 >${log_path}/${model} 2>&1
-print_info $? ${model}
-# 4.3 rl_nas
-model=rl_nas_v2_T_1card
-CUDA_VISIBLE_DEVICES=${cudaid1}  python rl_nas_mobilenetv2.py --search_steps 1 --port 8885 >${log_path}/${model} 2>&1
-print_info $? ${model}
-# 4.4 parl_nas
-#model=parl_nas_v2_T_1card
-#CUDA_VISIBLE_DEVICES=${cudaid1} python parl_nas_mobilenetv2.py \
-#--search_steps 1 --port 8887 >${log_path}/${model} 2>&1
-#print_info $? ${model}
-}
-all_nas(){ # 3 个模型
-   demo_nas
-}
-# 5 darts
-# search 1card # DARTS一阶近似搜索方法
-demo_darts(){
-cd ${slim_dir}/demo/darts || catchException demo_darts
-model=darts1_search_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python search.py --epochs 1 \
--use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
-print_info $? ${model}
-#train
-model=pcdarts_train_1card
-CUDA_VISIBLE_DEVICES=${cudaid1} python train.py --arch='PC_DARTS' \
--epochs 1 --use_multiprocess False \
--batch_size 32 >${log_path}/${model} 2>&1
-print_info $? ${model}
-# 可视化
-#pip install graphviz
-#model=slim_darts_visualize_pcdarts
-#python visualize.py PC_DARTS > ${log_path}/${model} 2>&1
-#print_info $? ${model}
-}
-slimfacenet(){
-cd ${slim_dir}/demo/slimfacenet  || catchException slimfacenet
-ln -s ${data_path}/slim/slimfacenet/CASIA CASIA
-ln -s ${data_path}/slim/slimfacenet/lfw lfw
-model=slim_slimfacenet_B75_train
-CUDA_VISIBLE_DEVICES=${cudaid1} python -u train_eval.py \
--train_data_dir=./CASIA/ --test_data_dir=./lfw/ \
--action train --model=SlimFaceNet_B_x0_75 \
--start_epoch 0 --total_epoch 1 >${log_path}/slim_slimfacenet_B75_train 2>&1
-print_info $? ${model}
-model=slim_slimfacenet_B75_quan
-CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action quant --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/  >${log_path}/slim_slimfacenet_B75_quan 2>&1
-print_info $? ${model}
-model=slim_slimfacenet_B75_eval
-CUDA_VISIBLE_DEVICES=${cudaid1} python train_eval.py \
--action test --train_data_dir=./CASIA/ \
--test_data_dir=./lfw/ >${log_path}/slim_slimfacenet_B75_eval 2>&1
-print_info $? ${model}
-}
-all_darts(){  # 2个模型
-    demo_darts
-    #slimfacenet  需要删掉
-}
-demo_latency(){
-cd ${slim_dir}/demo/analysis  || catchException demo_latency
-model=latency_mobilenet_v1_fp32
-python latency_predictor.py --model mobilenet_v1 --data_type fp32 >${log_path}/${model} 2>&1
-print_info $? ${model}
-model=latency_mobilenet_v1_int8
-python latency_predictor.py --model mobilenet_v1 --data_type int8 >${log_path}/${model} 2>&1
-print_info $? ${model}
-model=latency_mobilenet_v2_fp32
-python latency_predictor.py --model mobilenet_v2 --data_type fp32 >${log_path}/${model} 2>&1
-print_info $? ${model}
-model=latency_mobilenet_v2_int8
-python latency_predictor.py --model mobilenet_v2 --data_type int8 >${log_path}/${model} 2>&1
-print_info $? ${model}
-}
-all_latency(){
-  demo_latency
-}
-####################################
-export all_case_list=(all_distillation all_quant all_prune all_nas  )
-export all_case_time=0
-declare -A all_P0case_dic
-all_case_dic=(["all_distillation"]=5 ["all_quant"]=15 ["all_prune"]=1 ["all_nas"]=30 ["all_darts"]=30 ['unstructured_prune']=15 ['dy_qat1']=1)
-for key in $(echo ${!all_case_dic[*]});do
-    all_case_time=`expr ${all_case_time} + ${all_case_dic[$key]}`
-done
-set -e
-echo -e "\033[35m ---- P0case_list length: ${#all_case_list[*]}, cases: ${all_case_list[*]} \033[0m"
-echo -e "\033[35m ---- P0case_time: $all_case_time min \033[0m"
-set +e
-####################################
-echo -e "\033[35m ---- start run case  \033[0m"
-case_num=1
-for model in ${all_case_list[*]};do
-    echo -e "\033[35m ---- running P0case $case_num/${#all_case_list[*]}: ${model} , task time: ${all_case_list[${model}]} min \033[0m"
-    ${model}
-    let case_num++
-done
-echo -e "\033[35m ---- end run case  \033[0m"
-cd ${slim_dir}/logs
-FF=`ls *FAIL*|wc -l`
-if [ "${FF}" -gt "0" ];then
-    exit 1
-else
-    exit 0
-fi