diff --git a/slim/distillation/README.md b/slim/distillation/README.md index 23ac99a2089d6dc384b6be0b4f92c6c62eb71507..622e0aa9c2908353e98ff4e427bcad0d66fe8111 100644 --- a/slim/distillation/README.md +++ b/slim/distillation/README.md @@ -71,10 +71,10 @@ distill_loss = l2_loss('teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_1.tmp ### 执行示例 如下命令启动训练,每间隔```cfg.TRAIN.SNAPSHOT_EPOCH```会进行一次评估。 ```shell -CUDA_VISIBLE_DEVICES=0,1 -python -m paddle.distributed.launch ./slim/distill/train.py \ ---log_steps 10 --cfg ./slim/distill/cityscape_fast_scnn.yaml \ ---teacher_cfg ./slim/distill/cityscape_teacher.yaml \ +CUDA_VISIBLE_DEVICES=0,1 +python -m paddle.distributed.launch ./slim/distillation/train_distill.py \ +--log_steps 10 --cfg ./slim/distillation/cityscape.yaml \ +--teacher_cfg ./slim/distillation/cityscape_teacher.yaml \ --use_gpu \ --use_mpio \ --do_eval diff --git a/slim/distillation/model_builder.py b/slim/distillation/model_builder.py index 2f9eacd1cfacd7cee035dc64115f2578d4965b3d..f903b8dd2b635fa10070dcc3da488be66746d539 100644 --- a/slim/distillation/model_builder.py +++ b/slim/distillation/model_builder.py @@ -157,7 +157,7 @@ def export_preprocess(image): def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwargs): - print('debugggggggggg') + if not ModelPhase.is_valid_phase(phase): raise ValueError("ModelPhase {} is not valid!".format(phase)) if ModelPhase.is_train(phase): @@ -176,7 +176,6 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 # 预测部署时只须对输入图像增加batch_size维度即可 if cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER: - print('teacher input:') image = main_prog.global_block()._clone_variable(kwargs['image'], force_persistable=False) label = main_prog.global_block()._clone_variable(kwargs['label'], diff --git a/slim/distillation/train_distill.py b/slim/distillation/train_distill.py index 2a415cecd529c7cff7e4ae575230200d9499a14a..c214b71e5605c57a52172bd27e5750d648b82736 100644 --- a/slim/distillation/train_distill.py +++ b/slim/distillation/train_distill.py @@ -23,7 +23,7 @@ import sys LOCAL_PATH = os.path.dirname(os.path.abspath(__file__)) SEG_PATH = os.path.join(LOCAL_PATH, "../../", "pdseg") sys.path.append(SEG_PATH) - +sys.path.append('/workspace/codes/PaddleSlim1') import argparse import pprint import random @@ -278,8 +278,6 @@ def train(cfg): label=grts, mask=masks) exe.run(teacher_startup_program) - # assert FLAGS.teacher_pretrained, "teacher_pretrained should be set" - # checkpoint.load_params(exe, teacher_program, FLAGS.teacher_pretrained) teacher_program = teacher_program.clone(for_test=True) ckpt_dir = cfg.SLIM.KNOWLEDGE_DISTILL_TEACHER_MODEL_DIR @@ -295,14 +293,14 @@ def train(cfg): 'mask': 'mask', } merge(teacher_program, fluid.default_main_program(), data_name_map, place) - distill_pairs = [['teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_1.tmp_0']] + distill_pairs = [['teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0']] def distill(pairs, weight): """ Add 3 pairs of distillation losses, each pair of feature maps is the input of teacher and student's yolov3_loss respectively """ - loss = l2_loss(pairs[0][0], pairs[0][1], masks) + loss = l2_loss(pairs[0][0], pairs[0][1]) weighted_loss = loss * weight return weighted_loss diff --git a/slim/nas/README.md b/slim/nas/README.md index 730d9bb95e89e4a6aab64203b3644c97b3822b03..2029261cb70e55af919a815ece0229bc1ffac530 100644 --- a/slim/nas/README.md +++ b/slim/nas/README.md @@ -46,7 +46,7 @@ SLIM: ## 训练与评估 执行以下命令,边训练边评估 ```shell -python -u ./slim/nas/train.py --log_steps 10 --cfg configs/cityscape.yaml --use_gpu --use_mpio \ +python -u ./slim/nas/train_nas.py --log_steps 10 --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml --use_gpu --use_mpio \ SLIM.NAS_PORT 23333 \ SLIM.NAS_ADDRESS "" \ SLIM.NAS_SEARCH_STEPS 2 \