diff --git a/README.md b/README.md index 0fefe77bb4c078a15c4f02a6d189a240cf304de6..23558c5a9e7c099c1599da7bfa1d25ac910fe87a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ## 简介 -PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义分割库,覆盖了DeepLabv3+, U-Net, ICNet, PSPNet, HRNet等主流分割模型。通过统一的配置,帮助用户更便捷地完成从训练到部署的全流程图像分割应用。 +PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义分割库,覆盖了DeepLabv3+, U-Net, ICNet, PSPNet, HRNet, Fast-SCNN等主流分割模型。通过统一的配置,帮助用户更便捷地完成从训练到部署的全流程图像分割应用。
@@ -33,7 +33,7 @@ PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义 - **模块化设计** -支持U-Net, DeepLabv3+, ICNet, PSPNet, HRNet五种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, BCE Loss等方式可以强化小目标和不均衡样本场景下的分割精度。 +支持U-Net, DeepLabv3+, ICNet, PSPNet, HRNet, Fast-SCNN六种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, BCE Loss等方式可以强化小目标和不均衡样本场景下的分割精度。 - **高性能** @@ -163,6 +163,14 @@ A: 请将PaddlePaddle升级至1.5.2版本或以上。

   微信公众号                官方技术交流QQ群

## 更新日志 +* 2020.02.25 + + **`v0.4.0`** + * 新增适用于实时场景且不需要预训练模型的分割网络Fast-SCNN,提供基于Cityscapes的[预训练模型](./docs/model_zoo.md)1个。 + * 新增LaneNet车道线检测网络,提供[预训练模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/v0.4.0/contrib/LaneNet#%E4%B8%83-%E5%8F%AF%E8%A7%86%E5%8C%96)一个。 + * 新增基于PaddleSlim的分割库压缩策略([量化](./slim/quantization/README.md), [蒸馏](./slim/distillation/README.md), [剪枝](./slim/prune/README.md), [搜索](./slim/nas/README.md)) + + * 2019.12.15 **`v0.3.0`** diff --git a/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml b/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0dbeddd2142f4b504a130c4273be4ef77cfadef5 --- /dev/null +++ b/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml @@ -0,0 +1,49 @@ +EVAL_CROP_SIZE: (1025, 1025) # (width, height), for unpadding rangescaling and stepscaling +TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling +AUG: + AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling + FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding + INF_RESIZE_VALUE: 500 # for rangescaling + MAX_RESIZE_VALUE: 600 # for rangescaling + MIN_RESIZE_VALUE: 400 # for rangescaling + MAX_SCALE_FACTOR: 2.0 # for stepscaling + MIN_SCALE_FACTOR: 0.5 # for stepscaling + SCALE_STEP_SIZE: 0.25 # for stepscaling + FLIP: True +BATCH_SIZE: 24 +DATASET: + DATA_DIR: "./dataset/MiniDeepGlobeRoadExtraction/" + IMAGE_TYPE: "rgb" # choice rgb or rgba + NUM_CLASSES: 2 + TEST_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" + TRAIN_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/train.txt" + VAL_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt" + IGNORE_INDEX: 255 + SEPARATOR: '|' +FREEZE: + MODEL_FILENAME: "model" + PARAMS_FILENAME: "params" + SAVE_DIR: "freeze_model" +MODEL: + DEFAULT_NORM_TYPE: "bn" + MODEL_NAME: "deeplabv3p" + DEEPLAB: + BACKBONE: "mobilenetv2" + DEPTH_MULTIPLIER: 1.0 + ENCODER_WITH_ASPP: False + ENABLE_DECODER: False +TEST: + TEST_MODEL: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/final" +TRAIN: + MODEL_SAVE_DIR: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/" + PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/" + SNAPSHOT_EPOCH: 10 +SOLVER: + LR: 0.1 + LR_POLICY: "poly" + OPTIMIZER: "sgd" + NUM_EPOCHS: 300 + LOSS: ["lovasz_hinge_loss","bce_loss"] + LOSS_WEIGHT: + LOVASZ_HINGE_LOSS: 0.5 + BCE_LOSS: 0.5 diff --git a/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml b/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml new file mode 100755 index 0000000000000000000000000000000000000000..0c083ba509b1d6c83e852ab261da887b3c451370 --- /dev/null +++ b/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml @@ -0,0 +1,48 @@ +TRAIN_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #训练时图像裁剪尺寸(宽,高) +EVAL_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #验证时图像裁剪尺寸(宽,高) +AUG: + AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling + FIX_RESIZE_SIZE: (500, 500) # (width, height), for unpadding + + INF_RESIZE_VALUE: 500 # for rangescaling + MAX_RESIZE_VALUE: 600 # for rangescaling + MIN_RESIZE_VALUE: 400 # for rangescaling + + MAX_SCALE_FACTOR: 1.25 # for stepscaling + MIN_SCALE_FACTOR: 0.75 # for stepscaling + SCALE_STEP_SIZE: 0.05 # for stepscaling + MIRROR: True + FLIP: True +BATCH_SIZE: 16 #批处理大小 +DATASET: + DATA_DIR: "./dataset/VOCtrainval_11-May-2012/VOC2012/" #图片路径 + IMAGE_TYPE: "rgb" # choice rgb or rgba #图片类别“RGB” + NUM_CLASSES: 21 #类别数(包括背景类别) + TEST_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list" + TRAIN_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/train.list" + VAL_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list" + IGNORE_INDEX: 255 + SEPARATOR: " " +MODEL: + MODEL_NAME: "deeplabv3p" + DEFAULT_NORM_TYPE: "bn" #指定norm的类型,此处提供bn和gn(默认)两种选择,分别指batch norm和group norm。 + DEEPLAB: + BACKBONE: "mobilenetv2" + DEPTH_MULTIPLIER: 1.0 + ENCODER_WITH_ASPP: False + ENABLE_DECODER: False +TRAIN: + PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/" + MODEL_SAVE_DIR: "./saved_model/lovasz-softmax-voc" #模型保存路径 + SNAPSHOT_EPOCH: 10 +TEST: + TEST_MODEL: "./saved_model/lovasz-softmax-voc/final" #为测试模型路径 +SOLVER: + NUM_EPOCHS: 100 #训练epoch数,正整数 + LR: 0.0001 #初始学习率 + LR_POLICY: "poly" #学习率下降方法, 选项为poly、piecewise和cosine + OPTIMIZER: "sgd" #优化算法, 选项为sgd和adam + LOSS: ["lovasz_softmax_loss","softmax_loss"] + LOSS_WEIGHT: + LOVASZ_SOFTMAX_LOSS: 0.2 + SOFTMAX_LOSS: 0.8 diff --git a/contrib/LaneNet/README.md b/contrib/LaneNet/README.md index b86777305c160edae7a55349d719c9df2a2da4f9..1448951e900dbb8bb235be476698eb13d62f5e4c 100644 --- a/contrib/LaneNet/README.md +++ b/contrib/LaneNet/README.md @@ -108,7 +108,7 @@ SOLVER: 使用下述命令启动训练 ```shell -CUDA_VISIBLE_DEVICES=0 python -u train.py --cfg configs/lanenet.yaml --use_gpu --use_mpio --do_eval +CUDA_VISIBLE_DEVICES=0 python -u train.py --cfg configs/lanenet.yaml --use_gpu --do_eval ``` ## 六. 进行评估 diff --git a/contrib/LaneNet/eval.py b/contrib/LaneNet/eval.py index 9256c4f024e7d15c9c018c4fe5930e5b7865c7e0..025fb4e7284adefdafa4365ace347a68e2addf48 100644 --- a/contrib/LaneNet/eval.py +++ b/contrib/LaneNet/eval.py @@ -101,10 +101,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): for b in data_gen: yield b - py_reader, pred, grts, masks, accuracy, fp, fn = build_model( + data_loader, pred, grts, masks, accuracy, fp, fn = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment @@ -127,7 +127,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): # Use streaming confusion matrix to calculate mean_iou np.set_printoptions( precision=4, suppress=True, linewidth=160, floatmode="fixed") - fetch_list = [pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name] + fetch_list = [ + pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name + ] num_images = 0 step = 0 avg_acc = 0.0 @@ -137,7 +139,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() - py_reader.start() + data_loader.start() while True: try: step += 1 @@ -153,7 +155,8 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): print( "[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}" - .format(step, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images, speed, + .format(step, avg_acc / num_images, avg_fp / num_images, + avg_fn / num_images, speed, calculate_eta(all_step - step, speed))) timer.restart() @@ -162,7 +165,8 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): break print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format( - num_images, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images)) + num_images, avg_acc / num_images, avg_fp / num_images, + avg_fn / num_images)) return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images diff --git a/contrib/LaneNet/models/model_builder.py b/contrib/LaneNet/models/model_builder.py index ed6c275ecd51a2fc9f7f2fdf125300ce026c0a0a..b274e95a7b761f0345923c7702f91420a2469404 100644 --- a/contrib/LaneNet/models/model_builder.py +++ b/contrib/LaneNet/models/model_builder.py @@ -25,6 +25,7 @@ from pdseg.loss import multi_softmax_with_loss from loss import discriminative_loss from models.modeling import lanenet + class ModelPhase(object): """ Standard name for model phase in PaddleSeg @@ -107,35 +108,31 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): width = cfg.EVAL_CROP_SIZE[0] height = cfg.EVAL_CROP_SIZE[1] - image_shape = [cfg.DATASET.DATA_DIM, height, width] - grt_shape = [1, height, width] + image_shape = [-1, cfg.DATASET.DATA_DIM, height, width] + grt_shape = [-1, 1, height, width] class_num = cfg.DATASET.NUM_CLASSES with fluid.program_guard(main_prog, start_prog): with fluid.unique_name.guard(): - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=grt_shape, dtype='int32') + image = fluid.data(name='image', shape=image_shape, dtype='float32') + label = fluid.data(name='label', shape=grt_shape, dtype='int32') if cfg.MODEL.MODEL_NAME == 'lanenet': - label_instance = fluid.layers.data( + label_instance = fluid.data( name='label_instance', shape=grt_shape, dtype='int32') - mask = fluid.layers.data( - name='mask', shape=grt_shape, dtype='int32') + mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - # use PyReader when doing traning and evaluation + # use DataLoader.from_generator when doing traning and evaluation if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - py_reader = fluid.io.PyReader( + data_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label, label_instance, mask], capacity=cfg.DATALOADER.BUF_SIZE, iterable=False, use_double_buffer=True) - loss_type = cfg.SOLVER.LOSS if not isinstance(loss_type, list): loss_type = list(loss_type) - + logits = seg_model(image, class_num) if ModelPhase.is_train(phase): @@ -144,25 +141,30 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): if cfg.MODEL.MODEL_NAME == 'lanenet': embeding_logit = logits[1] logits = logits[0] - disc_loss, _, _, l_reg = discriminative_loss(embeding_logit, label_instance, 4, - image_shape[1:], 0.5, 3.0, 1.0, 1.0, 0.001) + disc_loss, _, _, l_reg = discriminative_loss( + embeding_logit, label_instance, 4, image_shape[2:], 0.5, + 3.0, 1.0, 1.0, 0.001) if "softmax_loss" in loss_type: weight = None if cfg.MODEL.MODEL_NAME == 'lanenet': weight = get_dynamic_weight(label) - seg_loss = multi_softmax_with_loss(logits, label, mask, class_num, weight) + seg_loss = multi_softmax_with_loss(logits, label, mask, + class_num, weight) loss_valid = True valid_loss.append("softmax_loss") if not loss_valid: - raise Exception("SOLVER.LOSS: {} is set wrong. it should " - "include one of (softmax_loss, bce_loss, dice_loss) at least" - " example: ['softmax_loss']".format(cfg.SOLVER.LOSS)) + raise Exception( + "SOLVER.LOSS: {} is set wrong. it should " + "include one of (softmax_loss, bce_loss, dice_loss) at least" + " example: ['softmax_loss']".format(cfg.SOLVER.LOSS)) invalid_loss = [x for x in loss_type if x not in valid_loss] if len(invalid_loss) > 0: - print("Warning: the loss {} you set is invalid. it will not be included in loss computed.".format(invalid_loss)) + print( + "Warning: the loss {} you set is invalid. it will not be included in loss computed." + .format(invalid_loss)) avg_loss = disc_loss + 0.00001 * l_reg + seg_loss @@ -202,12 +204,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): accuracy, fp, fn = compute_metric(pred, label) if ModelPhase.is_eval(phase): - return py_reader, pred, label, mask, accuracy, fp, fn + return data_loader, pred, label, mask, accuracy, fp, fn if ModelPhase.is_train(phase): optimizer = solver.Solver(main_prog, start_prog) decayed_lr = optimizer.optimise(avg_loss) - return py_reader, avg_loss, decayed_lr, pred, label, mask, disc_loss, seg_loss, accuracy, fp, fn + return data_loader, avg_loss, decayed_lr, pred, label, mask, disc_loss, seg_loss, accuracy, fp, fn def compute_metric(pred, label): @@ -216,19 +218,27 @@ def compute_metric(pred, label): idx = fluid.layers.where(pred == 1) pix_cls_ret = fluid.layers.gather_nd(label, idx) - correct_num = fluid.layers.reduce_sum(fluid.layers.cast(pix_cls_ret, 'float32')) + correct_num = fluid.layers.reduce_sum( + fluid.layers.cast(pix_cls_ret, 'float32')) - gt_num = fluid.layers.cast(fluid.layers.shape(fluid.layers.gather_nd(label, - fluid.layers.where(label == 1)))[0], 'int64') - pred_num = fluid.layers.cast(fluid.layers.shape(fluid.layers.gather_nd(pred, idx))[0], 'int64') + gt_num = fluid.layers.cast( + fluid.layers.shape( + fluid.layers.gather_nd(label, fluid.layers.where(label == 1)))[0], + 'int64') + pred_num = fluid.layers.cast( + fluid.layers.shape(fluid.layers.gather_nd(pred, idx))[0], 'int64') accuracy = correct_num / gt_num false_pred = pred_num - correct_num - fp = fluid.layers.cast(false_pred, 'float32') / fluid.layers.cast(fluid.layers.shape(pix_cls_ret)[0], 'int64') - - label_cls_ret = fluid.layers.gather_nd(label, fluid.layers.where(label == 1)) - mis_pred = fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], 'int64') - correct_num - fn = fluid.layers.cast(mis_pred, 'float32') / fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], 'int64') + fp = fluid.layers.cast(false_pred, 'float32') / fluid.layers.cast( + fluid.layers.shape(pix_cls_ret)[0], 'int64') + + label_cls_ret = fluid.layers.gather_nd(label, + fluid.layers.where(label == 1)) + mis_pred = fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], + 'int64') - correct_num + fn = fluid.layers.cast(mis_pred, 'float32') / fluid.layers.cast( + fluid.layers.shape(label_cls_ret)[0], 'int64') accuracy.stop_gradient = True fp.stop_gradient = True fn.stop_gradient = True @@ -239,7 +249,8 @@ def get_dynamic_weight(label): label = fluid.layers.reshape(label, [-1]) unique_labels, unique_id, counts = fluid.layers.unique_with_counts(label) counts = fluid.layers.cast(counts, 'float32') - weight = 1.0 / fluid.layers.log((counts / fluid.layers.reduce_sum(counts) + 1.02)) + weight = 1.0 / fluid.layers.log( + (counts / fluid.layers.reduce_sum(counts) + 1.02)) return weight diff --git a/contrib/LaneNet/train.py b/contrib/LaneNet/train.py index 3ee9489c9b18b19b6b84615a400815a3bc33ccb2..c2f5bee7547eabe9ef5c998b197fbaf59130d679 100644 --- a/contrib/LaneNet/train.py +++ b/contrib/LaneNet/train.py @@ -232,9 +232,9 @@ def train(cfg): cfg.BATCH_SIZE_PER_DEV = batch_size_per_dev print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - py_reader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model( + data_loader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -315,7 +315,10 @@ def train(cfg): format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) # fetch_list = [avg_loss.name, lr.name, accuracy.name, precision.name, recall.name] - fetch_list = [avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name, fp.name, fn.name] + fetch_list = [ + avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name, + fp.name, fn.name + ] if args.debug: # Fetch more variable info and use streaming confusion matrix to # calculate IoU results if in debug mode @@ -359,7 +362,7 @@ def train(cfg): print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: # If not in debug mode, avoid unnessary log and calculate @@ -385,16 +388,15 @@ def train(cfg): avg_fn /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( - "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}" - ).format(epoch, global_step, lr[0], avg_loss, avg_seg_loss, avg_emb_loss, avg_acc, avg_fp, avg_fn, speed, - calculate_eta(all_step - global_step, speed))) + "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}" + ).format(epoch, global_step, lr[0], avg_loss, avg_seg_loss, + avg_emb_loss, avg_acc, avg_fp, avg_fn, speed, + calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, global_step) - log_writer.add_scalar('Train/lr', lr[0], - global_step) - log_writer.add_scalar('Train/speed', speed, - global_step) + log_writer.add_scalar('Train/lr', lr[0], global_step) + log_writer.add_scalar('Train/speed', speed, global_step) sys.stdout.flush() avg_loss = 0.0 avg_seg_loss = 0.0 @@ -405,7 +407,7 @@ def train(cfg): timer.restart() except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) @@ -423,10 +425,8 @@ def train(cfg): if args.use_tb: log_writer.add_scalar('Evaluate/accuracy', accuracy, global_step) - log_writer.add_scalar('Evaluate/fp', fp, - global_step) - log_writer.add_scalar('Evaluate/fn', fn, - global_step) + log_writer.add_scalar('Evaluate/fp', fp, global_step) + log_writer.add_scalar('Evaluate/fn', fn, global_step) # Use Tensorboard to visualize results if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None: diff --git a/contrib/RealTimeHumanSeg/README.md b/contrib/RealTimeHumanSeg/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e8693e11e4d66b9a2ee04bf1e03a5704a95fb426 --- /dev/null +++ b/contrib/RealTimeHumanSeg/README.md @@ -0,0 +1,28 @@ +# 实时人像分割预测部署 + +本模型基于飞浆开源的人像分割模型,并做了大量的针对视频的光流追踪优化,提供了完整的支持视频流的实时人像分割解决方案,并提供了高性能的`Python`和`C++`集成部署方案,以满足不同场景的需求。 + + +## 模型下载 + +支持的模型文件如下,请根据应用场景选择合适的模型: +|模型文件 | 说明 | +|---|---| +|[shv75_deeplab_0303_quant](https://paddleseg.bj.bcebos.com/deploy/models/shv75_0303_quant.zip) | 小模型, 适合轻量级计算环境 | +|[shv75_deeplab_0303](https://paddleseg.bj.bcebos.com/deploy/models/shv75_deeplab_0303.zip)| 小模型,适合轻量级计算环境 | +|[deeplabv3_xception_humanseg](https://paddleseg.bj.bcebos.com/deploy/models/deeplabv3_xception_humanseg.zip) | 服务端GPU环境 | + +**注意:下载后解压到合适的路径,后续该路径将做为预测参数用于加载模型。** + + +## 预测部署 +- [Python预测部署](./python) +- [C++预测部署](./cpp) + +## 效果预览 + +
+ + +
+ diff --git a/contrib/RealTimeHumanSeg/python/README.md b/contrib/RealTimeHumanSeg/python/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1e089c9f5226e2482cd6e8957406c00095706b1b --- /dev/null +++ b/contrib/RealTimeHumanSeg/python/README.md @@ -0,0 +1,61 @@ +# 实时人像分割Python预测部署方案 + +本方案基于Python实现,最小化依赖并把所有模型加载、数据预处理、预测、光流处理等后处理都封装在文件`infer.py`中,用户可以直接使用或集成到自己项目中。 + + +## 前置依赖 +- Windows(7,8,10) / Linux (Ubuntu 16.04) or MacOS 10.1+ +- Paddle 1.6.1+ +- Python 3.0+ + +注意: +1. 仅测试过Paddle1.6 和 1.7, 其它版本不支持 +2. MacOS上不支持GPU预测 +3. Python2上未测试 + +其它未涉及情形,能正常安装`Paddle` 和`OpenCV`通常都能正常使用。 + + +## 安装依赖 +### 1. 安装paddle + +PaddlePaddle的安装, 请按照[官网指引](https://paddlepaddle.org.cn/install/quick)安装合适自己的版本。 + +### 2. 安装其它依赖 + +执行如下命令 + +```shell +pip install -r requirements.txt +``` + +## 运行 + + +1. 输入图片进行分割 +``` +python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --img_path /PATH/TO/INPUT/IMAGE +``` + +预测结果会保存为`result.jpeg`。 +2. 输入视频进行分割 +```shell +python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --video_path /PATH/TO/INPUT/VIDEO +``` + +预测结果会保存在`result.avi`。 + +3. 使用摄像头视频流 +```shell +python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --use_camera 1 +``` +预测结果会通过可视化窗口实时显示。 + +**注意:** + + +`GPU`默认关闭, 如果要使用`GPU`进行加速,则先运行 +``` +export CUDA_VISIBLE_DEVICES=0 +``` +然后在前面的预测命令中增加参数`--use_gpu 1`即可。 diff --git a/contrib/RealTimeHumanSeg/python/infer.py b/contrib/RealTimeHumanSeg/python/infer.py new file mode 100644 index 0000000000000000000000000000000000000000..73df081e4cbda06e20b471b2eae60a2ba037e49a --- /dev/null +++ b/contrib/RealTimeHumanSeg/python/infer.py @@ -0,0 +1,345 @@ +# coding: utf8 +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""实时人像分割Python预测部署""" + +import os +import argparse +import numpy as np +import cv2 + +import paddle.fluid as fluid + + +def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow): + """计算光流跟踪匹配点和光流图 + 输入参数: + pre_gray: 上一帧灰度图 + cur_gray: 当前帧灰度图 + prev_cfd: 上一帧光流图 + dl_weights: 融合权重图 + disflow: 光流数据结构 + 返回值: + is_track: 光流点跟踪二值图,即是否具有光流点匹配 + track_cfd: 光流跟踪图 + """ + check_thres = 8 + hgt, wdh = pre_gray.shape[:2] + track_cfd = np.zeros_like(prev_cfd) + is_track = np.zeros_like(pre_gray) + # 计算前向光流 + flow_fw = disflow.calc(pre_gray, cur_gray, None) + # 计算后向光流 + flow_bw = disflow.calc(cur_gray, pre_gray, None) + get_round = lambda data: (int)(data + 0.5) if data >= 0 else (int)(data -0.5) + for row in range(hgt): + for col in range(wdh): + # 计算光流处理后对应点坐标 + # (row, col) -> (cur_x, cur_y) + fxy_fw = flow_fw[row, col] + dx_fw = get_round(fxy_fw[0]) + cur_x = dx_fw + col + dy_fw = get_round(fxy_fw[1]) + cur_y = dy_fw + row + if cur_x < 0 or cur_x >= wdh or cur_y < 0 or cur_y >= hgt: + continue + fxy_bw = flow_bw[cur_y, cur_x] + dx_bw = get_round(fxy_bw[0]) + dy_bw = get_round(fxy_bw[1]) + # 光流移动小于阈值 + lmt = ((dy_fw + dy_bw) * (dy_fw + dy_bw) + (dx_fw + dx_bw) * (dx_fw + dx_bw)) + if lmt >= check_thres: + continue + # 静止点降权 + if abs(dy_fw) <= 0 and abs(dx_fw) <= 0 and abs(dy_bw) <= 0 and abs(dx_bw) <= 0: + dl_weights[cur_y, cur_x] = 0.05 + is_track[cur_y, cur_x] = 1 + track_cfd[cur_y, cur_x] = prev_cfd[row, col] + return track_cfd, is_track, dl_weights + + +def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track): + """光流追踪图和人像分割结构融合 + 输入参数: + track_cfd: 光流追踪图 + dl_cfd: 当前帧分割结果 + dl_weights: 融合权重图 + is_track: 光流点匹配二值图 + 返回值: + cur_cfd: 光流跟踪图和人像分割结果融合图 + """ + cur_cfd = dl_cfd.copy() + idxs = np.where(is_track > 0) + for i in range(len(idxs)): + x, y = idxs[0][i], idxs[1][i] + dl_score = dl_cfd[y, x] + track_score = track_cfd[y, x] + if dl_score > 0.9 or dl_score < 0.1: + if dl_weights[x, y] < 0.1: + cur_cfd[x, y] = 0.3 * dl_score + 0.7 * track_score + else: + cur_cfd[x, y] = 0.4 * dl_score + 0.6 * track_score + else: + cur_cfd[x, y] = dl_weights[x, y] * dl_score + (1 - dl_weights[x, y]) * track_score + return cur_cfd + + +def threshold_mask(img, thresh_bg, thresh_fg): + """设置背景和前景阈值mask + 输入参数: + img : 原始图像, np.uint8 类型. + thresh_bg : 背景阈值百分比,低于该值置为0. + thresh_fg : 前景阈值百分比,超过该值置为1. + 返回值: + dst : 原始图像设置完前景背景阈值mask结果, np.float32 类型. + """ + dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg) + dst[np.where(dst > 1)] = 1 + dst[np.where(dst < 0)] = 0 + return dst.astype(np.float32) + + +def optflow_handle(cur_gray, scoremap, is_init): + """光流优化 + Args: + cur_gray : 当前帧灰度图 + scoremap : 当前帧分割结果 + is_init : 是否第一帧 + Returns: + dst : 光流追踪图和预测结果融合图, 类型为 np.float32 + """ + width, height = scoremap.shape[0], scoremap.shape[1] + disflow = cv2.DISOpticalFlow_create( + cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST) + prev_gray = np.zeros((height, width), np.uint8) + prev_cfd = np.zeros((height, width), np.float32) + cur_cfd = scoremap.copy() + if is_init: + is_init = False + if height <= 64 or width <= 64: + disflow.setFinestScale(1) + elif height <= 160 or width <= 160: + disflow.setFinestScale(2) + else: + disflow.setFinestScale(3) + fusion_cfd = cur_cfd + else: + weights = np.ones((width, height), np.float32) * 0.3 + track_cfd, is_track, weights = human_seg_tracking( + prev_gray, cur_gray, prev_cfd, weights, disflow) + fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track) + fusion_cfd = cv2.GaussianBlur(fusion_cfd, (3, 3), 0) + return fusion_cfd + + +class HumanSeg: + """人像分割类 + 封装了人像分割模型的加载,数据预处理,预测,后处理等 + """ + def __init__(self, model_dir, mean, scale, eval_size, use_gpu=False): + + self.mean = np.array(mean).reshape((3, 1, 1)) + self.scale = np.array(scale).reshape((3, 1, 1)) + self.eval_size = eval_size + self.load_model(model_dir, use_gpu) + + def load_model(self, model_dir, use_gpu): + """加载模型并创建predictor + Args: + model_dir: 预测模型路径, 包含 `__model__` 和 `__params__` + use_gpu: 是否使用GPU加速 + """ + prog_file = os.path.join(model_dir, '__model__') + params_file = os.path.join(model_dir, '__params__') + config = fluid.core.AnalysisConfig(prog_file, params_file) + if use_gpu: + config.enable_use_gpu(100, 0) + config.switch_ir_optim(True) + else: + config.disable_gpu() + config.disable_glog_info() + config.switch_specify_input_names(True) + config.enable_memory_optim() + self.predictor = fluid.core.create_paddle_predictor(config) + + def preprocess(self, image): + """图像预处理 + hwc_rgb 转换为 chw_bgr,并进行归一化 + 输入参数: + image: 原始图像 + 返回值: + 经过预处理后的图片结果 + """ + img_mat = cv2.resize( + image, self.eval_size, interpolation=cv2.INTER_LINEAR) + # HWC -> CHW + img_mat = img_mat.swapaxes(1, 2) + img_mat = img_mat.swapaxes(0, 1) + # Convert to float + img_mat = img_mat[:, :, :].astype('float32') + # img_mat = (img_mat - mean) * scale + img_mat = img_mat - self.mean + img_mat = img_mat * self.scale + img_mat = img_mat[np.newaxis, :, :, :] + return img_mat + + def postprocess(self, image, output_data): + """对预测结果进行后处理 + Args: + image: 原始图,opencv 图片对象 + output_data: Paddle预测结果原始数据 + Returns: + 原图和预测结果融合并做了光流优化的结果图 + """ + scoremap = output_data[0, 1, :, :] + scoremap = (scoremap * 255).astype(np.uint8) + ori_h, ori_w = image.shape[0], image.shape[1] + evl_h, evl_w = self.eval_size[0], self.eval_size[1] + # 光流处理 + cur_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + cur_gray = cv2.resize(cur_gray, (evl_w, evl_h)) + optflow_map = optflow_handle(cur_gray, scoremap, False) + optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0) + optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8) + optflow_map = cv2.resize(optflow_map, (ori_w, ori_h)) + optflow_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2) + bg_im = np.ones_like(optflow_map) * 255 + comb = (optflow_map * image + (1 - optflow_map) * bg_im).astype(np.uint8) + return comb + + def run_predict(self, image): + """运行预测并返回可视化结果图 + 输入参数: + image: 需要预测的原始图, opencv图片对象 + 返回值: + 可视化的预测结果图 + """ + im_mat = self.preprocess(image) + im_tensor = fluid.core.PaddleTensor(im_mat.copy().astype('float32')) + output_data = self.predictor.run([im_tensor])[0] + output_data = output_data.as_ndarray() + return self.postprocess(image, output_data) + + +def predict_image(seg, image_path): + """对图片文件进行分割 + 结果保存到`result.jpeg`文件中 + """ + img_mat = cv2.imread(image_path) + img_mat = seg.run_predict(img_mat) + cv2.imwrite('result.jpeg', img_mat) + + +def predict_video(seg, video_path): + """对视频文件进行分割 + 结果保存到`result.avi`文件中 + """ + cap = cv2.VideoCapture(video_path) + if not cap.isOpened(): + print("Error opening video stream or file") + return + width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + fps = cap.get(cv2.CAP_PROP_FPS) + # 用于保存预测结果视频 + out = cv2.VideoWriter('result.avi', + cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps, + (width, height)) + # 开始获取视频帧 + while cap.isOpened(): + ret, frame = cap.read() + if ret: + img_mat = seg.run_predict(frame) + out.write(img_mat) + else: + break + cap.release() + out.release() + + +def predict_camera(seg): + """从摄像头获取视频流进行预测 + 视频分割结果实时显示到可视化窗口中 + """ + cap = cv2.VideoCapture(0) + if not cap.isOpened(): + print("Error opening video stream or file") + return + # Start capturing from video + while cap.isOpened(): + ret, frame = cap.read() + if ret: + img_mat = seg.run_predict(frame) + cv2.imshow('HumanSegmentation', img_mat) + if cv2.waitKey(1) & 0xFF == ord('q'): + break + else: + break + cap.release() + + +def main(args): + """预测程序入口 + 完成模型加载, 对视频、摄像头、图片文件等预测过程 + """ + model_dir = args.model_dir + use_gpu = args.use_gpu + + # 加载模型 + mean = [104.008, 116.669, 122.675] + scale = [1.0, 1.0, 1.0] + eval_size = (192, 192) + seg = HumanSeg(model_dir, mean, scale, eval_size, use_gpu) + if args.use_camera: + # 开启摄像头 + predict_camera(seg) + elif args.video_path: + # 使用视频文件作为输入 + predict_video(seg, args.video_path) + elif args.img_path: + # 使用图片文件作为输入 + predict_image(seg, args.img_path) + + +def parse_args(): + """解析命令行参数 + """ + parser = argparse.ArgumentParser('Realtime Human Segmentation') + parser.add_argument('--model_dir', + type=str, + default='', + help='path of human segmentation model') + parser.add_argument('--img_path', + type=str, + default='', + help='path of input image') + parser.add_argument('--video_path', + type=str, + default='', + help='path of input video') + parser.add_argument('--use_camera', + type=bool, + default=False, + help='input video stream from camera') + parser.add_argument('--use_gpu', + type=bool, + default=False, + help='enable gpu') + return parser.parse_args() + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/contrib/RealTimeHumanSeg/python/requirements.txt b/contrib/RealTimeHumanSeg/python/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..953dae0cf5e2036ad093907b30ac9a3a10858d27 --- /dev/null +++ b/contrib/RealTimeHumanSeg/python/requirements.txt @@ -0,0 +1,2 @@ +opencv-python==4.1.2.30 +opencv-contrib-python==4.2.0.32 diff --git a/docs/imgs/fast-scnn.png b/docs/imgs/fast-scnn.png new file mode 100644 index 0000000000000000000000000000000000000000..2b00eb88401b9981a6d59595bbdf4e1f692db934 Binary files /dev/null and b/docs/imgs/fast-scnn.png differ diff --git a/docs/imgs/lovasz-hinge.png b/docs/imgs/lovasz-hinge.png new file mode 100644 index 0000000000000000000000000000000000000000..579b3b7b8fd27acbbde073564e53e2cbdfb3df67 Binary files /dev/null and b/docs/imgs/lovasz-hinge.png differ diff --git a/docs/imgs/lovasz-softmax.png b/docs/imgs/lovasz-softmax.png new file mode 100644 index 0000000000000000000000000000000000000000..afb3f2e8e29c1c7925228aa8336b6bb23e1e93ba Binary files /dev/null and b/docs/imgs/lovasz-softmax.png differ diff --git a/docs/lovasz_loss.md b/docs/lovasz_loss.md new file mode 100644 index 0000000000000000000000000000000000000000..1270b3fd0fc677350f8c51a2e7a8ac704af68de5 --- /dev/null +++ b/docs/lovasz_loss.md @@ -0,0 +1,116 @@ +# Lovasz loss +对于图像分割任务中,经常出现类别分布不均匀的情况,例如:工业产品的瑕疵检测、道路提取及病变区域提取等。 + +我们可使用lovasz loss解决这个问题。Lovasz loss根据分割目标的类别数量可分为两种:lovasz hinge loss适用于二分类问题,lovasz softmax loss适用于多分类问题。 + + +## Lovasz hinge loss +### 使用方式 + +PaddleSeg通过`cfg.SOLVER.LOSS`参数可以选择训练时的损失函数, +如`cfg.SOLVER.LOSS=['lovasz_hinge_loss','bce_loss']`将指定训练loss为`lovasz hinge loss`与`bce loss`的组合。 + +Lovasz hinge loss有3种使用方式:(1)直接训练使用。(2)bce loss结合使用。(3)先使用bec loss进行训练,再使用lovasz hinge loss进行finetuning. 第1种方式不一定达到理想效果,推荐使用后两种方式。本文以第2种方式为例。 + +### 使用示例 + +我们以道路提取任务为例应用lovasz hinge loss. +在DeepGlobe比赛的Road Extraction中,训练数据道路占比为:4.5%. 如下为其图片样例: +

+
+

+可以看出道路在整张图片中的比例很小。 + +#### 实验对比 + +在MiniDeepGlobeRoadExtraction数据集进行了实验对比。 + +* 数据集下载 +我们从DeepGlobe比赛的Road Extraction的训练集中随机抽取了800张图片作为训练集,200张图片作为验证集, +制作了一个小型的道路提取数据集[MiniDeepGlobeRoadExtraction](https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip) + +```shell +python dataset/download_mini_deepglobe_road_extraction.py +``` + +* 预训练模型下载 +```shell +python pretrained_model/download_model.py deeplabv3p_mobilenetv2-1-0_bn_coco +``` +* 配置/数据校验 +```shell +python pdseg/check.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml +``` + +* 训练 +```shell +python pdseg/train.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_hinge_loss','bce_loss']" +``` + +* 评估 +```shell +python pdseg/eval.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_hinge_loss','bce_loss']" +``` + +* 结果比较 + +lovasz hinge loss + bce loss和softmax loss的对比结果如下图所示。 +

+
+

+ +图中蓝色曲线为lovasz hinge loss + bce loss,最高mIoU为76.2%,橙色曲线为softmax loss, 最高mIoU为73.44%,相比提升2.76个百分点。 + + + +## Lovasz softmax loss +### 使用方式 + +PaddleSeg通过`cfg.SOLVER.LOSS`参数可以选择训练时的损失函数, +如`cfg.SOLVER.LOSS=['lovasz_softmax_loss','softmax_loss']`将指定训练loss为`lovasz softmax loss`与`softmax loss`的组合。 + +Lovasz softmax loss有3种使用方式:(1)直接训练使用。(2)softmax loss结合使用。(3)先使用softmax loss进行训练,再使用lovasz softmax loss进行finetuning. 第1种方式不一定达到理想效果,推荐使用后两种方式。本文以第2种方式为例。 + +### 使用示例 + +我们以Pascal voc为例应用lovasz softmax loss. + + +#### 实验对比 + +在Pascal voc数据集上与softmax loss进行了实验对比。 + +* 数据集下载 +```shell +python dataset/download_and_convert_voc2012.py +``` + +* 预训练模型下载 +```shell +python pretrained_model/download_model.py deeplabv3p_mobilenetv2-1-0_bn_coco +``` +* 配置/数据校验 +```shell +python pdseg/check.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml +``` + +* 训练 +```shell +python pdseg/train.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_softmax_loss','softmax_loss']" + +``` + +* 评估 +```shell +python pdseg/eval.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_softmax_loss','softmax_loss']" + +``` + +* 结果比较 + +lovasz softmax loss + softmax loss和softmax loss的对比结果如下图所示。 +

+
+

+ +图中橙色曲线代表lovasz softmax loss + softmax loss,最高mIoU为64.63%,蓝色曲线代表softmax loss, 最高mIoU为63.55%,相比提升1.08个百分点。 diff --git a/docs/models.md b/docs/models.md index a452aa3639c3901d8f75d1aa4f5f1b7f393ce0b7..c36fff5fc88aef362f3ab4f7175b7d60f579e418 100644 --- a/docs/models.md +++ b/docs/models.md @@ -5,6 +5,7 @@ - [PSPNet](#PSPNet) - [ICNet](#ICNet) - [HRNet](#HRNet) +- [Fast-SCNN](#Fast-SCNN) ## U-Net U-Net [1] 起源于医疗图像分割,整个网络是标准的encoder-decoder网络,特点是参数少,计算快,应用性强,对于一般场景适应度很高。U-Net最早于2015年提出,并在ISBI 2015 Cell Tracking Challenge取得了第一。经过发展,目前有多个变形和应用。 @@ -58,6 +59,14 @@ HRNet在人体姿态估计、语义分割和目标检测领域都取得了显著 ![](./imgs/hrnet.png) +### Fast-SCNN + +Fast-SCNN [7] 是一个面向实时的语义分割网络。在双分支的结构基础上,大量使用了深度可分离卷积和逆残差(inverted-residual)模块,并且使用特征融合构造金字塔池化模块 (Pyramid Pooling Module)来融合上下文信息。这使得Fast-SCNN在保持高效的情况下能学习到丰富的细节信息。 + +整个网络结构如下: + +![](./imgs/fast-scnn.png) + ## 参考文献 [1] [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) @@ -72,3 +81,6 @@ HRNet在人体姿态估计、语义分割和目标检测领域都取得了显著 [6] [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919) +[7] [Fast-SCNN: Fast Semantic Segmentation Network](https://arxiv.org/abs/1902.04502) + + diff --git a/pdseg/eval.py b/pdseg/eval.py index b842431fb895c0985da1de5d5ef65073534a9835..426e52f9287bbdb5e4b2e2a4bb617ee910aeff5e 100644 --- a/pdseg/eval.py +++ b/pdseg/eval.py @@ -92,10 +92,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): for b in data_gen: yield b[0], b[1], b[2] - py_reader, avg_loss, pred, grts, masks = build_model( + data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment @@ -128,7 +128,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() - py_reader.start() + data_loader.start() while True: try: step += 1 diff --git a/pdseg/lovasz_losses.py b/pdseg/lovasz_losses.py new file mode 100755 index 0000000000000000000000000000000000000000..5f2283583972945ca3d70233a684833ed8902d6c --- /dev/null +++ b/pdseg/lovasz_losses.py @@ -0,0 +1,205 @@ +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Lovasz-Softmax and Jaccard hinge loss in PaddlePaddle""" + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import paddle.fluid as fluid +import numpy as np + + +def _cumsum(x): + y = np.array(x) + return np.cumsum(y, axis=0) + + +def create_tmp_var(name, dtype, shape): + return fluid.default_main_program().current_block().create_var( + name=name, dtype=dtype, shape=shape) + + +def lovasz_grad(gt_sorted): + """ + Computes gradient of the Lovasz extension w.r.t sorted errors + See Alg. 1 in paper + """ + gt_sorted = fluid.layers.squeeze(gt_sorted, axes=[1]) + gts = fluid.layers.reduce_sum(gt_sorted) + len_gt = fluid.layers.shape(gt_sorted) + + # Acceleration is achieved by reducing the number of calls to cumsum. + # This calculation method is equivalent to that of the original paper. + var_one = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32') + range_ = fluid.layers.range(1, len_gt + var_one, 1, 'int32') + tmp_var = create_tmp_var( + name='tmp_var', dtype=gt_sorted.dtype, shape=gt_sorted.shape) + cumsum_ = fluid.layers.py_func(func=_cumsum, x=gt_sorted, out=tmp_var) + intersection = gts - cumsum_ + union = intersection + range_ + + jaccard = 1.0 - intersection / union + jaccard0 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[1]) + jaccard1 = fluid.layers.slice(jaccard, axes=[0], starts=[1], ends=[len_gt]) + jaccard2 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[-1]) + jaccard = fluid.layers.concat([jaccard0, jaccard1 - jaccard2], axis=0) + jaccard = fluid.layers.unsqueeze(jaccard, axes=[1]) + return jaccard + + +def lovasz_hinge(logits, labels, ignore=None): + """ + Binary Lovasz hinge loss + logits: [N, C, H, W] Tensor, logits at each pixel (between -\infty and +\infty) + labels: [N, 1, H, W] Tensor, binary ground truth masks (0 or 1) + ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0 + """ + loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore)) + return loss + + +def lovasz_hinge_flat(logits, labels): + """ + Binary Lovasz hinge loss + logits: [P] Tensor, logits at each prediction (between -\infty and +\infty) + labels: [P] Tensor, binary ground truth labels (0 or 1) + """ + shape = fluid.layers.shape(logits) + y = fluid.layers.zeros_like(shape[0]) + + out_var = fluid.layers.create_tensor("float32") + with fluid.layers.control_flow.Switch() as switch: + with switch.case(fluid.layers.equal(shape[0], y)): + loss = fluid.layers.reduce_sum(logits) * 0. + fluid.layers.assign(input=loss, output=out_var) + with switch.case(fluid.layers.greater_than(shape[0], y)): + labelsf = fluid.layers.cast(labels, logits.dtype) + signs = labelsf * 2 - 1. + signs.stop_gradient = True + errors = 1.0 - fluid.layers.elementwise_mul(logits, signs) + errors_sorted, perm = fluid.layers.argsort( + errors, axis=0, descending=True) + errors_sorted.stop_gradient = False + gt_sorted = fluid.layers.gather(labelsf, perm) + + grad = lovasz_grad(gt_sorted) + grad.stop_gradient = True + loss = fluid.layers.reduce_sum( + fluid.layers.relu(errors_sorted) * grad) + fluid.layers.assign(input=loss, output=out_var) + return out_var + + +def flatten_binary_scores(scores, labels, ignore=None): + """ + Flattens predictions in the batch (binary case) + Remove labels according to 'ignore' + """ + scores = fluid.layers.reshape(scores, [-1, 1]) + labels = fluid.layers.reshape(labels, [-1, 1]) + labels.stop_gradient = True + if ignore is None: + return scores, labels + ignore = fluid.layers.cast(ignore, 'int32') + ignore_mask = fluid.layers.reshape(ignore, (-1, 1)) + indexs = fluid.layers.where(ignore_mask == 1) + indexs.stop_gradient = True + vscores = fluid.layers.gather(scores, indexs[:, 0]) + vlabels = fluid.layers.gather(labels, indexs[:, 0]) + return vscores, vlabels + + +def lovasz_softmax(probas, labels, classes='present', ignore=None): + """ + Multi-class Lovasz-Softmax loss + probas: [N, C, H, W] Tensor, class probabilities at each prediction (between 0 and 1). + labels: [N, 1, H, W] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0 + """ + vprobas, vlabels = flatten_probas(probas, labels, ignore) + loss = lovasz_softmax_flat(vprobas, vlabels, classes=classes) + return loss + + +def lovasz_softmax_flat(probas, labels, classes='present'): + """ + Multi-class Lovasz-Softmax loss + probas: [P, C] Tensor, class probabilities at each prediction (between 0 and 1) + labels: [P] Tensor, ground truth labels (between 0 and C - 1) + classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average. + """ + C = probas.shape[1] + losses = [] + present = [] + classes_to_sum = list(range(C)) if classes in ['all', 'present' + ] else classes + for c in classes_to_sum: + fg = fluid.layers.cast(labels == c, probas.dtype) + fg.stop_gradient = True + if classes == 'present': + present.append( + fluid.layers.cast(fluid.layers.reduce_sum(fg) > 0, "int64")) + if C == 1: + if len(classes_to_sum) > 1: + raise ValueError('Sigmoid output possible only with 1 class') + class_pred = probas[:, 0] + else: + class_pred = probas[:, c] + errors = fluid.layers.abs(fg - class_pred) + errors_sorted, perm = fluid.layers.argsort( + errors, axis=0, descending=True) + errors_sorted.stop_gradient = False + + fg_sorted = fluid.layers.gather(fg, perm) + fg_sorted.stop_gradient = True + + grad = lovasz_grad(fg_sorted) + grad.stop_gradient = True + loss = fluid.layers.reduce_sum(errors_sorted * grad) + + losses.append(loss) + + if len(classes_to_sum) == 1: + return losses[0] + + losses_tensor = fluid.layers.stack(losses) + if classes == 'present': + present_tensor = fluid.layers.stack(present) + index = fluid.layers.where(present_tensor == 1) + index.stop_gradient = True + losses_tensor = fluid.layers.gather(losses_tensor, index[:, 0]) + loss = fluid.layers.mean(losses_tensor) + return loss + + +def flatten_probas(probas, labels, ignore=None): + """ + Flattens predictions in the batch + """ + if len(probas.shape) == 3: + probas = fluid.layers.unsqueeze(probas, axis=[1]) + C = probas.shape[1] + probas = fluid.layers.transpose(probas, [0, 2, 3, 1]) + probas = fluid.layers.reshape(probas, [-1, C]) + labels = fluid.layers.reshape(labels, [-1, 1]) + if ignore is None: + return probas, labels + ignore = fluid.layers.cast(ignore, 'int32') + ignore_mask = fluid.layers.reshape(ignore, [-1, 1]) + indexs = fluid.layers.where(ignore_mask == 1) + indexs.stop_gradient = True + vprobas = fluid.layers.gather(probas, indexs[:, 0]) + vlabels = fluid.layers.gather(labels, indexs[:, 0]) + return vprobas, vlabels diff --git a/pdseg/models/backbone/mobilenet_v2.py b/pdseg/models/backbone/mobilenet_v2.py index ba9c2e7812cb2e19cc839e84b201e45c357cc692..740284b319bd836d9c27682c1c22d556d2b98aa1 100644 --- a/pdseg/models/backbone/mobilenet_v2.py +++ b/pdseg/models/backbone/mobilenet_v2.py @@ -308,8 +308,8 @@ def MobileNetV2_scale(): if __name__ == '__main__': - image_shape = [3, 224, 224] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image_shape = [-1, 3, 224, 224] + image = fluid.data(name='image', shape=image_shape, dtype='float32') model = MobileNetV2_x1_0() logit, decode_ends = model.net(image) #print("logit:", logit.shape) diff --git a/pdseg/models/backbone/xception.py b/pdseg/models/backbone/xception.py index 09b356973bdafc21952eaa9c88ab43c861677d57..5c07f240625744356c5df4644342cff6c81af687 100644 --- a/pdseg/models/backbone/xception.py +++ b/pdseg/models/backbone/xception.py @@ -311,7 +311,7 @@ def xception_71(): if __name__ == '__main__': - image_shape = [3, 224, 224] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image_shape = [-1, 3, 224, 224] + image = fluid.data(name='image', shape=image_shape, dtype='float32') model = xception_65() logit = model.net(image) diff --git a/pdseg/models/model_builder.py b/pdseg/models/model_builder.py index 668d69e44aeb91cc7705a79f092730ae6a1fdb09..864602247f8edad016c82d10b42c3585df7a2490 100644 --- a/pdseg/models/model_builder.py +++ b/pdseg/models/model_builder.py @@ -24,6 +24,8 @@ from utils.config import cfg from loss import multi_softmax_with_loss from loss import multi_dice_loss from loss import multi_bce_loss +from lovasz_losses import lovasz_hinge +from lovasz_losses import lovasz_softmax from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn @@ -166,8 +168,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): width = cfg.EVAL_CROP_SIZE[0] height = cfg.EVAL_CROP_SIZE[1] - image_shape = [cfg.DATASET.DATA_DIM, height, width] - grt_shape = [1, height, width] + image_shape = [-1, cfg.DATASET.DATA_DIM, height, width] + grt_shape = [-1, 1, height, width] class_num = cfg.DATASET.NUM_CLASSES with fluid.program_guard(main_prog, start_prog): @@ -175,25 +177,26 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 # 预测部署时只须对输入图像增加batch_size维度即可 if ModelPhase.is_predict(phase): - origin_image = fluid.layers.data( - name='image', - shape=[-1, -1, -1, cfg.DATASET.DATA_DIM], - dtype='float32', - append_batch_size=False) - image, valid_shape, origin_shape = export_preprocess( - origin_image) + if cfg.SLIM.PREPROCESS: + image = fluid.data( + name='image', shape=image_shape, dtype='float32') + else: + origin_image = fluid.data( + name='image', + shape=[-1, -1, -1, cfg.DATASET.DATA_DIM], + dtype='float32') + image, valid_shape, origin_shape = export_preprocess( + origin_image) else: - image = fluid.layers.data( + image = fluid.data( name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=grt_shape, dtype='int32') - mask = fluid.layers.data( - name='mask', shape=grt_shape, dtype='int32') + label = fluid.data(name='label', shape=grt_shape, dtype='int32') + mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - # use PyReader when doing traning and evaluation + # use DataLoader when doing traning and evaluation if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - py_reader = fluid.io.PyReader( + data_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label, mask], capacity=cfg.DATALOADER.BUF_SIZE, iterable=False, @@ -203,19 +206,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): if not isinstance(loss_type, list): loss_type = list(loss_type) - # dice_loss或bce_loss只适用两类分割中 - if class_num > 2 and (("dice_loss" in loss_type) or + # lovasz_hinge_loss或dice_loss或bce_loss只适用两类分割中 + if class_num > 2 and (("lovasz_hinge_loss" in loss_type) or + ("dice_loss" in loss_type) or ("bce_loss" in loss_type)): raise Exception( - "dice loss and bce loss is only applicable to binary classfication" + "lovasz hinge loss, dice loss and bce loss are only applicable to binary classfication." ) - # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1 - if ("dice_loss" in loss_type) or ("bce_loss" in loss_type): + # 在两类分割情况下,当loss函数选择lovasz_hinge_loss或dice_loss或bce_loss的时候,最后logit输出通道数设置为1 + if ("dice_loss" in loss_type) or ("bce_loss" in loss_type) or ( + "lovasz_hinge_loss" in loss_type): class_num = 1 - if "softmax_loss" in loss_type: + if ("softmax_loss" in loss_type) or ( + "lovasz_softmax_loss" in loss_type): raise Exception( - "softmax loss can not combine with dice loss or bce loss" + "softmax loss or lovasz softmax loss can not combine with bce loss or dice loss or lovasz hinge loss." ) logits = seg_model(image, class_num) @@ -227,7 +233,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): if "softmax_loss" in loss_type: weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT avg_loss_list.append( - multi_softmax_with_loss(logits, label, mask, class_num, weight)) + multi_softmax_with_loss(logits, label, mask, class_num, + weight)) loss_valid = True valid_loss.append("softmax_loss") if "dice_loss" in loss_type: @@ -238,11 +245,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): avg_loss_list.append(multi_bce_loss(logits, label, mask)) loss_valid = True valid_loss.append("bce_loss") + if "lovasz_hinge_loss" in loss_type: + avg_loss_list.append( + lovasz_hinge(logits, label, ignore=mask)) + loss_valid = True + valid_loss.append("lovasz_hinge_loss") + if "lovasz_softmax_loss" in loss_type: + probas = fluid.layers.softmax(logits, axis=1) + avg_loss_list.append( + lovasz_softmax(probas, label, ignore=mask)) + loss_valid = True + valid_loss.append("lovasz_softmax_loss") if not loss_valid: raise Exception( "SOLVER.LOSS: {} is set wrong. it should " - "include one of (softmax_loss, bce_loss, dice_loss) at least" - " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss']" + "include one of (softmax_loss, bce_loss, dice_loss, lovasz_hinge_loss, lovasz_softmax_loss) at least" + " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss'], ['lovasz_hinge_loss','bce_loss'], ['lovasz_softmax_loss','softmax_loss']" .format(cfg.SOLVER.LOSS)) invalid_loss = [x for x in loss_type if x not in valid_loss] @@ -253,7 +271,9 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): avg_loss = 0 for i in range(0, len(avg_loss_list)): - avg_loss += avg_loss_list[i] + loss_name = valid_loss[i].upper() + loss_weight = eval('cfg.SOLVER.LOSS_WEIGHT.' + loss_name) + avg_loss += loss_weight * avg_loss_list[i] #get pred result in original size if isinstance(logits, tuple): @@ -266,22 +286,26 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): # return image input and logit output for inference graph prune if ModelPhase.is_predict(phase): - # 两类分割中,使用dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换 + # 两类分割中,使用lovasz_hinge_loss或dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换 if class_num == 1: logit = sigmoid_to_softmax(logit) else: logit = softmax(logit) # 获取有效部分 - logit = fluid.layers.slice( - logit, axes=[2, 3], starts=[0, 0], ends=valid_shape) - - logit = fluid.layers.resize_bilinear( - logit, - out_shape=origin_shape, - align_corners=False, - align_mode=0) - logit = fluid.layers.argmax(logit, axis=1) + if cfg.SLIM.PREPROCESS: + return image, logit + + else: + logit = fluid.layers.slice( + logit, axes=[2, 3], starts=[0, 0], ends=valid_shape) + + logit = fluid.layers.resize_bilinear( + logit, + out_shape=origin_shape, + align_corners=False, + align_mode=0) + logit = fluid.layers.argmax(logit, axis=1) return origin_image, logit if class_num == 1: @@ -300,12 +324,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN): return pred, logit if ModelPhase.is_eval(phase): - return py_reader, avg_loss, pred, label, mask + return data_loader, avg_loss, pred, label, mask if ModelPhase.is_train(phase): optimizer = solver.Solver(main_prog, start_prog) decayed_lr = optimizer.optimise(avg_loss) - return py_reader, avg_loss, decayed_lr, pred, label, mask + return data_loader, avg_loss, decayed_lr, pred, label, mask def to_int(string, dest="I"): diff --git a/pdseg/models/modeling/hrnet.py b/pdseg/models/modeling/hrnet.py index 741834e157105b233403772f2672ed60aafc488f..4b95461a1adf5c96cd8737540b3509944ea7900e 100644 --- a/pdseg/models/modeling/hrnet.py +++ b/pdseg/models/modeling/hrnet.py @@ -202,7 +202,7 @@ def hrnet(input, num_classes): return logit if __name__ == '__main__': - image_shape = [3, 769, 769] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image_shape = [-1, 3, 769, 769] + image = fluid.data(name='image', shape=image_shape, dtype='float32') logit = hrnet(image, 4) print("logit:", logit.shape) diff --git a/pdseg/models/modeling/icnet.py b/pdseg/models/modeling/icnet.py index 354468c9efbe3d3429845f7605927556ef3b505a..f6364ff282b3feea2b2bebe796d34434f59b18ca 100644 --- a/pdseg/models/modeling/icnet.py +++ b/pdseg/models/modeling/icnet.py @@ -191,7 +191,7 @@ def icnet(input, num_classes): if __name__ == '__main__': - image_shape = [3, 320, 320] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image_shape = [-1, 3, 320, 320] + image = fluid.data(name='image', shape=image_shape, dtype='float32') logit = icnet(image, 4) print("logit:", logit.shape) diff --git a/pdseg/models/modeling/unet.py b/pdseg/models/modeling/unet.py index 0f613a83b7f468cf1573b4f57f36e8e51fefb7ac..215737aa658bcd4046ecaa77bb04fd11a329be24 100644 --- a/pdseg/models/modeling/unet.py +++ b/pdseg/models/modeling/unet.py @@ -129,7 +129,7 @@ def unet(input, num_classes): if __name__ == '__main__': - image_shape = [3, 320, 320] - image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') + image_shape = [-1, 3, 320, 320] + image = fluid.data(name='image', shape=image_shape, dtype='float32') logit = unet(image, 4) print("logit:", logit.shape) diff --git a/pdseg/train.py b/pdseg/train.py index 8254f1655c97c09204d2e4a64e2404907270fcfc..9e30c0f2050bd4987d84675985a86922e1c993c3 100644 --- a/pdseg/train.py +++ b/pdseg/train.py @@ -103,7 +103,7 @@ def parse_args(): help='If set True, enable continuous evaluation job.' 'This flag is only used for internal test.', action='store_true') - + # NOTE: This for benchmark parser.add_argument( '--is_profiler', @@ -114,7 +114,7 @@ def parse_args(): '--profiler_path', help='the profiler output file path.(used for benchmark)', default='./seg.profiler', - type=str) + type=str) return parser.parse_args() @@ -265,9 +265,9 @@ def train(cfg): batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - py_reader, avg_loss, lr, pred, grts, masks = build_model( + data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -386,7 +386,7 @@ def train(cfg): print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: if args.debug: @@ -454,16 +454,16 @@ def train(cfg): sys.stdout.flush() avg_loss = 0.0 timer.restart() - + # NOTE : used for benchmark, profiler tools - if args.is_profiler and epoch == 1 and global_step == args.log_steps: + if args.is_profiler and epoch == 1 and global_step == args.log_steps: profiler.start_profiler("All") elif args.is_profiler and epoch == 1 and global_step == args.log_steps + 5: profiler.stop_profiler("total", args.profiler_path) return except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) diff --git a/pdseg/utils/config.py b/pdseg/utils/config.py index c3d84216752838a388fd2cda1946949d77960fb9..141b17ce24df1f78310975ef236290011ebffb56 100644 --- a/pdseg/utils/config.py +++ b/pdseg/utils/config.py @@ -155,10 +155,16 @@ cfg.SOLVER.BEGIN_EPOCH = 1 cfg.SOLVER.NUM_EPOCHS = 30 # loss的选择,支持softmax_loss, bce_loss, dice_loss cfg.SOLVER.LOSS = ["softmax_loss"] -# 是否开启warmup学习策略 -cfg.SOLVER.LR_WARMUP = False +# loss的权重,用于多loss组合加权使用,仅对SOLVER.LOSS内包含的loss生效 +cfg.SOLVER.LOSS_WEIGHT.SOFTMAX_LOSS = 1 +cfg.SOLVER.LOSS_WEIGHT.DICE_LOSS = 1 +cfg.SOLVER.LOSS_WEIGHT.BCE_LOSS = 1 +cfg.SOLVER.LOSS_WEIGHT.LOVASZ_HINGE_LOSS = 1 +cfg.SOLVER.LOSS_WEIGHT.LOVASZ_SOFTMAX_LOSS = 1 +# 是否开启warmup学习策略 +cfg.SOLVER.LR_WARMUP = False # warmup的迭代次数 -cfg.SOLVER.LR_WARMUP_STEPS = 2000 +cfg.SOLVER.LR_WARMUP_STEPS = 2000 # cross entropy weight, 默认为None,如果设置为'dynamic',会根据每个batch中各个类别的数目, # 动态调整类别权重。 # 也可以设置一个静态权重(list的方式),比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9] @@ -228,7 +234,6 @@ cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160] cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3 cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320] - ########################## 预测部署模型配置 ################################### # 预测保存的模型名称 cfg.FREEZE.MODEL_FILENAME = '__model__' @@ -251,4 +256,4 @@ cfg.SLIM.NAS_SPACE_NAME = "" cfg.SLIM.PRUNE_PARAMS = '' cfg.SLIM.PRUNE_RATIOS = [] - +cfg.SLIM.PREPROCESS = False diff --git a/slim/distillation/README.md b/slim/distillation/README.md index 2bd772a1001e11efa89324315fa32d44032ade05..d7af90beb3a7fd4fa6bb3775d45b0fd6aadc0133 100644 --- a/slim/distillation/README.md +++ b/slim/distillation/README.md @@ -89,7 +89,6 @@ python -m paddle.distributed.launch ./slim/distillation/train_distill.py \ --log_steps 10 --cfg ./slim/distillation/cityscape.yaml \ --teacher_cfg ./slim/distillation/cityscape_teacher.yaml \ --use_gpu \ ---use_mpio \ --do_eval ``` diff --git a/slim/distillation/model_builder.py b/slim/distillation/model_builder.py index f903b8dd2b635fa10070dcc3da488be66746d539..90f53992478e27c94fc8a7817931d3a46e0bb108 100644 --- a/slim/distillation/model_builder.py +++ b/slim/distillation/model_builder.py @@ -156,7 +156,10 @@ def export_preprocess(image): return image, valid_shape, origin_shape -def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwargs): +def build_model(main_prog=None, + start_prog=None, + phase=ModelPhase.TRAIN, + **kwargs): if not ModelPhase.is_valid_phase(phase): raise ValueError("ModelPhase {} is not valid!".format(phase)) @@ -167,8 +170,8 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg width = cfg.EVAL_CROP_SIZE[0] height = cfg.EVAL_CROP_SIZE[1] - image_shape = [cfg.DATASET.DATA_DIM, height, width] - grt_shape = [1, height, width] + image_shape = [-1, cfg.DATASET.DATA_DIM, height, width] + grt_shape = [-1, 1, height, width] class_num = cfg.DATASET.NUM_CLASSES #with fluid.program_guard(main_prog, start_prog): @@ -176,36 +179,30 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 # 预测部署时只须对输入图像增加batch_size维度即可 if cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER: - image = main_prog.global_block()._clone_variable(kwargs['image'], - force_persistable=False) - label = main_prog.global_block()._clone_variable(kwargs['label'], - force_persistable=False) - mask = main_prog.global_block()._clone_variable(kwargs['mask'], - force_persistable=False) + image = main_prog.global_block()._clone_variable( + kwargs['image'], force_persistable=False) + label = main_prog.global_block()._clone_variable( + kwargs['label'], force_persistable=False) + mask = main_prog.global_block()._clone_variable( + kwargs['mask'], force_persistable=False) else: if ModelPhase.is_predict(phase): - origin_image = fluid.layers.data( + origin_image = fluid.data( name='image', shape=[-1, -1, -1, cfg.DATASET.DATA_DIM], - dtype='float32', - append_batch_size=False) - image, valid_shape, origin_shape = export_preprocess( - origin_image) + dtype='float32') + image, valid_shape, origin_shape = export_preprocess(origin_image) else: - image = fluid.layers.data( - name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=grt_shape, dtype='int32') - mask = fluid.layers.data( - name='mask', shape=grt_shape, dtype='int32') + image = fluid.data(name='image', shape=image_shape, dtype='float32') + label = fluid.data(name='label', shape=grt_shape, dtype='int32') + mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - - # use PyReader when doing traning and evaluation + # use DataLoader.from_generator when doing traning and evaluation if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - py_reader = None + data_loader = None if not cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER: - py_reader = fluid.io.PyReader( + data_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label, mask], capacity=cfg.DATALOADER.BUF_SIZE, iterable=False, @@ -219,16 +216,14 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg if class_num > 2 and (("dice_loss" in loss_type) or ("bce_loss" in loss_type)): raise Exception( - "dice loss and bce loss is only applicable to binary classfication" - ) + "dice loss and bce loss is only applicable to binary classfication") # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1 if ("dice_loss" in loss_type) or ("bce_loss" in loss_type): class_num = 1 if "softmax_loss" in loss_type: raise Exception( - "softmax loss can not combine with dice loss or bce loss" - ) + "softmax loss can not combine with dice loss or bce loss") logits = seg_model(image, class_num) # 根据选择的loss函数计算相应的损失函数 @@ -289,10 +284,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg logit, axes=[2, 3], starts=[0, 0], ends=valid_shape) logit = fluid.layers.resize_bilinear( - logit, - out_shape=origin_shape, - align_corners=False, - align_mode=0) + logit, out_shape=origin_shape, align_corners=False, align_mode=0) logit = fluid.layers.argmax(logit, axis=1) return origin_image, logit @@ -312,7 +304,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg return pred, logit if ModelPhase.is_eval(phase): - return py_reader, avg_loss, pred, label, mask + return data_loader, avg_loss, pred, label, mask if ModelPhase.is_train(phase): decayed_lr = None @@ -321,7 +313,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg decayed_lr = optimizer.optimise(avg_loss) # optimizer = solver.Solver(main_prog, start_prog) # decayed_lr = optimizer.optimise(avg_loss) - return py_reader, avg_loss, decayed_lr, pred, label, mask, image + return data_loader, avg_loss, decayed_lr, pred, label, mask, image def to_int(string, dest="I"): diff --git a/slim/distillation/train_distill.py b/slim/distillation/train_distill.py index c1e23253ffcde9eea034bd7f67906ca9e534d2e2..e354107f173eea203d9df3f01f93fae62f41eabc 100644 --- a/slim/distillation/train_distill.py +++ b/slim/distillation/train_distill.py @@ -48,6 +48,7 @@ from utils import dist_utils import solver from paddleslim.dist.single_distiller import merge, l2_loss + def parse_args(): parser = argparse.ArgumentParser(description='PaddleSeg training') parser.add_argument( @@ -260,8 +261,9 @@ def train(cfg): batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - py_reader, loss, lr, pred, grts, masks, image = build_model(phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( + data_loader, loss, lr, pred, grts, masks, image = build_model( + phase=ModelPhase.TRAIN) + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -274,8 +276,12 @@ def train(cfg): with fluid.program_guard(teacher_program, teacher_startup_program): with fluid.unique_name.guard(): _, teacher_loss, _, _, _, _, _ = build_model( - teacher_program, teacher_startup_program, phase=ModelPhase.TRAIN, image=image, - label=grts, mask=masks) + teacher_program, + teacher_startup_program, + phase=ModelPhase.TRAIN, + image=image, + label=grts, + mask=masks) exe.run(teacher_startup_program) @@ -293,7 +299,9 @@ def train(cfg): 'mask': 'mask', } merge(teacher_program, fluid.default_main_program(), data_name_map, place) - distill_pairs = [['teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0']] + distill_pairs = [[ + 'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0' + ]] def distill(pairs, weight): """ @@ -322,7 +330,8 @@ def train(cfg): build_strategy.fuse_all_optimizer_ops = False build_strategy.fuse_elewise_add_act_ops = True if cfg.NUM_TRAINERS > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program()) + dist_utils.prepare_for_multi_process(exe, build_strategy, + fluid.default_main_program()) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: @@ -334,10 +343,11 @@ def train(cfg): print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") - compiled_train_prog = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel( - loss_name=all_loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) + compiled_train_prog = fluid.CompiledProgram( + fluid.default_main_program()).with_data_parallel( + loss_name=all_loss.name, + exec_strategy=exec_strategy, + build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH @@ -387,7 +397,9 @@ def train(cfg): format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) #fetch_list = [avg_loss.name, lr.name] - fetch_list = [loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name] + fetch_list = [ + loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name + ] if args.debug: # Fetch more variable info and use streaming confusion matrix to @@ -431,7 +443,7 @@ def train(cfg): print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: if args.debug: @@ -491,7 +503,8 @@ def train(cfg): speed = args.log_steps / timer.elapsed_time() print(( "epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}" - ).format(epoch, global_step, lr[0], avg_loss, avg_t_loss, avg_d_loss, speed, + ).format(epoch, global_step, lr[0], avg_loss, + avg_t_loss, avg_d_loss, speed, calculate_eta(all_step - global_step, speed))) if args.use_tb: log_writer.add_scalar('Train/loss', avg_loss, @@ -507,7 +520,7 @@ def train(cfg): timer.restart() except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) diff --git a/slim/nas/README.md b/slim/nas/README.md index cddfc5a82f07ab0b3f2e2acad6a4c0f7b2ed650c..31e8f93f608002504cdaeaed940e4b41c138e00c 100644 --- a/slim/nas/README.md +++ b/slim/nas/README.md @@ -46,7 +46,7 @@ SLIM: ## 训练与评估 执行以下命令,边训练边评估 ```shell -CUDA_VISIBLE_DEVICES=0 python -u ./slim/nas/train_nas.py --log_steps 10 --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml --use_gpu --use_mpio \ +CUDA_VISIBLE_DEVICES=0 python -u ./slim/nas/train_nas.py --log_steps 10 --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml --use_gpu \ SLIM.NAS_PORT 23333 \ SLIM.NAS_ADDRESS "" \ SLIM.NAS_SEARCH_STEPS 2 \ diff --git a/slim/nas/eval_nas.py b/slim/nas/eval_nas.py index 08f75f5d8ee8d6afbcf9b038e4f8dcf0237a5b56..7f8663dffafb49d7c372f3eeaf0d3ed074f7ce9b 100644 --- a/slim/nas/eval_nas.py +++ b/slim/nas/eval_nas.py @@ -45,6 +45,7 @@ from metrics import ConfusionMatrix from mobilenetv2_search_space import MobileNetV2SpaceSeg + def parse_args(): parser = argparse.ArgumentParser(description='PaddleSeg model evalution') parser.add_argument( @@ -98,10 +99,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): for b in data_gen: yield b[0], b[1], b[2] - py_reader, avg_loss, pred, grts, masks = build_model( + data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL, arch=kwargs['arch']) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment @@ -134,7 +135,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() - py_reader.start() + data_loader.start() while True: try: step += 1 diff --git a/slim/nas/model_builder.py b/slim/nas/model_builder.py index 3dfbacb0cd41a14bb81c6f6c82b81479fb1c30c8..27a14fa77970cad18e017dc825f1708ceb2c9c75 100644 --- a/slim/nas/model_builder.py +++ b/slim/nas/model_builder.py @@ -74,9 +74,7 @@ def seg_model(image, class_num, arch): if model_name == 'deeplabv3p': logits = deeplab.deeplabv3p_nas(image, class_num, arch) else: - raise Exception( - "unknow model name, only support deeplabv3p" - ) + raise Exception("unknow model name, only support deeplabv3p") return logits @@ -156,8 +154,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None): width = cfg.EVAL_CROP_SIZE[0] height = cfg.EVAL_CROP_SIZE[1] - image_shape = [cfg.DATASET.DATA_DIM, height, width] - grt_shape = [1, height, width] + image_shape = [-1, cfg.DATASET.DATA_DIM, height, width] + grt_shape = [-1, 1, height, width] class_num = cfg.DATASET.NUM_CLASSES with fluid.program_guard(main_prog, start_prog): @@ -165,25 +163,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None): # 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程 # 预测部署时只须对输入图像增加batch_size维度即可 if ModelPhase.is_predict(phase): - origin_image = fluid.layers.data( + origin_image = fluid.data( name='image', shape=[-1, -1, -1, cfg.DATASET.DATA_DIM], - dtype='float32', - append_batch_size=False) + dtype='float32') image, valid_shape, origin_shape = export_preprocess( origin_image) else: - image = fluid.layers.data( + image = fluid.data( name='image', shape=image_shape, dtype='float32') - label = fluid.layers.data( - name='label', shape=grt_shape, dtype='int32') - mask = fluid.layers.data( - name='mask', shape=grt_shape, dtype='int32') + label = fluid.data(name='label', shape=grt_shape, dtype='int32') + mask = fluid.data(name='mask', shape=grt_shape, dtype='int32') - # use PyReader when doing traning and evaluation + # use DataLoader.from_generator when doing traning and evaluation if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase): - py_reader = fluid.io.PyReader( + data_loader = fluid.io.DataLoader.from_generator( feed_list=[image, label, mask], capacity=cfg.DATALOADER.BUF_SIZE, iterable=False, @@ -217,7 +212,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None): if "softmax_loss" in loss_type: weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT avg_loss_list.append( - multi_softmax_with_loss(logits, label, mask, class_num, weight)) + multi_softmax_with_loss(logits, label, mask, class_num, + weight)) loss_valid = True valid_loss.append("softmax_loss") if "dice_loss" in loss_type: @@ -290,12 +286,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None): return pred, logit if ModelPhase.is_eval(phase): - return py_reader, avg_loss, pred, label, mask + return data_loader, avg_loss, pred, label, mask if ModelPhase.is_train(phase): optimizer = solver.Solver(main_prog, start_prog) decayed_lr = optimizer.optimise(avg_loss) - return py_reader, avg_loss, decayed_lr, pred, label, mask + return data_loader, avg_loss, decayed_lr, pred, label, mask def to_int(string, dest="I"): diff --git a/slim/nas/train_nas.py b/slim/nas/train_nas.py index 7822657fa264d053360199d5691098ae85fcd12c..6ab4d899dc2406275daf3fecd3738fb4b3b82c49 100644 --- a/slim/nas/train_nas.py +++ b/slim/nas/train_nas.py @@ -54,6 +54,7 @@ from paddleslim.analysis import flops from paddleslim.nas.sa_nas import SANAS from paddleslim.nas import search_space + def parse_args(): parser = argparse.ArgumentParser(description='PaddleSeg training') parser.add_argument( @@ -269,21 +270,24 @@ def train(cfg): port = cfg.SLIM.NAS_PORT server_address = (cfg.SLIM.NAS_ADDRESS, port) - sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS, - is_server=cfg.SLIM.NAS_IS_SERVER) + sa_nas = SANAS( + config, + server_addr=server_address, + search_steps=cfg.SLIM.NAS_SEARCH_STEPS, + is_server=cfg.SLIM.NAS_IS_SERVER) for step in range(cfg.SLIM.NAS_SEARCH_STEPS): arch = sa_nas.next_archs()[0] start_prog = fluid.Program() train_prog = fluid.Program() - py_reader, avg_loss, lr, pred, grts, masks = build_model( + data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN) cur_flops = flops(train_prog) print('current step:', step, 'flops:', cur_flops) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -297,7 +301,8 @@ def train(cfg): build_strategy = fluid.BuildStrategy() if cfg.NUM_TRAINERS > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) + dist_utils.prepare_for_multi_process(exe, build_strategy, + train_prog) exec_strategy.num_threads = 1 if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu: @@ -309,10 +314,11 @@ def train(cfg): print_info( "Sync BatchNorm strategy will not be effective if GPU device" " count <= 1") - compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel( - loss_name=avg_loss.name, - exec_strategy=exec_strategy, - build_strategy=build_strategy) + compiled_train_prog = fluid.CompiledProgram( + train_prog).with_data_parallel( + loss_name=avg_loss.name, + exec_strategy=exec_strategy, + build_strategy=build_strategy) # Resume training begin_epoch = cfg.SOLVER.BEGIN_EPOCH @@ -353,13 +359,14 @@ def train(cfg): print_info( "Parameter[{}] don't exist or shape does not match current network, skip" " to load it.".format(var.name)) - print_info("{}/{} pretrained parameters loaded successfully!".format( - len(load_vars), - len(load_vars) + len(load_fail_vars))) + print_info( + "{}/{} pretrained parameters loaded successfully!".format( + len(load_vars), + len(load_vars) + len(load_fail_vars))) else: print_info( 'Pretrained model dir {} not exists, training from scratch...'. - format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) + format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] @@ -374,8 +381,8 @@ def train(cfg): timer.start() if begin_epoch > cfg.SOLVER.NUM_EPOCHS: raise ValueError( - ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format( - begin_epoch, cfg.SOLVER.NUM_EPOCHS)) + ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]" + ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS)) if args.use_mpio: print_info("Use multiprocess reader") @@ -384,7 +391,7 @@ def train(cfg): best_miou = 0.0 for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: loss, lr = exe.run( @@ -398,21 +405,22 @@ def train(cfg): avg_loss /= args.log_steps speed = args.log_steps / timer.elapsed_time() print(( - "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" - ).format(epoch, global_step, lr[0], avg_loss, speed, - calculate_eta(all_step - global_step, speed))) + "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}" + ).format(epoch, global_step, lr[0], avg_loss, speed, + calculate_eta(all_step - global_step, speed))) sys.stdout.flush() avg_loss = 0.0 timer.restart() except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH: - ckpt_dir = save_checkpoint(exe, train_prog, '{}_tmp'.format(port)) + ckpt_dir = save_checkpoint(exe, train_prog, + '{}_tmp'.format(port)) _, mean_iou, _, mean_acc = evaluate( cfg=cfg, arch=arch, @@ -420,7 +428,8 @@ def train(cfg): use_gpu=args.use_gpu, use_mpio=args.use_mpio) if best_miou < mean_iou: - print('search step {}, epoch {} best iou {}'.format(step, epoch, mean_iou)) + print('search step {}, epoch {} best iou {}'.format( + step, epoch, mean_iou)) best_miou = mean_iou sa_nas.reward(float(best_miou)) diff --git a/slim/prune/README.md b/slim/prune/README.md index b6a45238938567a845b44ff768db6982bfeab55c..25505606e3fcc8c8e7c6beba68cdb8d39c1c56b1 100644 --- a/slim/prune/README.md +++ b/slim/prune/README.md @@ -46,7 +46,7 @@ SLIM.PRUNE_RATIOS '[0.1,0.1,0.1]' ```shell CUDA_VISIBLE_DEVICES=0 -python -u ./slim/prune/eval_prune.py --cfg configs/cityscape_fast_scnn.yaml --use_gpu --use_mpio \ +python -u ./slim/prune/eval_prune.py --cfg configs/cityscape_fast_scnn.yaml --use_gpu \ TEST.TEST_MODEL your_trained_model \ ``` diff --git a/slim/prune/eval_prune.py b/slim/prune/eval_prune.py index b8275d03475b8fea67d73682b54a38172fbc25e2..3bfb4f4cf2772da0e6122ec6f6660d90a23c71e2 100644 --- a/slim/prune/eval_prune.py +++ b/slim/prune/eval_prune.py @@ -45,6 +45,7 @@ from metrics import ConfusionMatrix from paddleslim.prune import load_model + def parse_args(): parser = argparse.ArgumentParser(description='PaddleSeg model evalution') parser.add_argument( @@ -98,10 +99,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): for b in data_gen: yield b[0], b[1], b[2] - py_reader, avg_loss, pred, grts, masks = build_model( + data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment @@ -134,7 +135,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() - py_reader.start() + data_loader.start() while True: try: step += 1 diff --git a/slim/prune/train_prune.py b/slim/prune/train_prune.py index 06e1658f1a3f721842fbe780820103aceac87a16..05c599e3327728ee1ef5e3f2dea359ab9dab5834 100644 --- a/slim/prune/train_prune.py +++ b/slim/prune/train_prune.py @@ -50,6 +50,7 @@ from utils import dist_utils from paddleslim.prune import Pruner, save_model from paddleslim.analysis import flops + def parse_args(): parser = argparse.ArgumentParser(description='PaddleSeg training') parser.add_argument( @@ -181,10 +182,12 @@ def load_checkpoint(exe, program): return begin_epoch + def print_info(*msg): if cfg.TRAINER_ID == 0: print(*msg) + def train(cfg): startup_prog = fluid.Program() train_prog = fluid.Program() @@ -236,9 +239,9 @@ def train(cfg): batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - py_reader, avg_loss, lr, pred, grts, masks = build_model( + data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -261,8 +264,9 @@ def train(cfg): print_info("Sync BatchNorm strategy is effective.") build_strategy.sync_batch_norm = True else: - print_info("Sync BatchNorm strategy will not be effective if GPU device" - " count <= 1") + print_info( + "Sync BatchNorm strategy will not be effective if GPU device" + " count <= 1") pruned_params = cfg.SLIM.PRUNE_PARAMS.strip().split(',') pruned_ratios = cfg.SLIM.PRUNE_RATIOS @@ -311,14 +315,16 @@ def train(cfg): for var in load_vars: print_info("Parameter[{}] loaded sucessfully!".format(var.name)) for var in load_fail_vars: - print_info("Parameter[{}] don't exist or shape does not match current network, skip" - " to load it.".format(var.name)) + print_info( + "Parameter[{}] don't exist or shape does not match current network, skip" + " to load it.".format(var.name)) print_info("{}/{} pretrained parameters loaded successfully!".format( len(load_vars), len(load_vars) + len(load_fail_vars))) else: - print_info('Pretrained model dir {} not exists, training from scratch...'. - format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) + print_info( + 'Pretrained model dir {} not exists, training from scratch...'. + format(cfg.TRAIN.PRETRAINED_MODEL_DIR)) fetch_list = [avg_loss.name, lr.name] if args.debug: @@ -371,7 +377,7 @@ def train(cfg): print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: if args.debug: @@ -441,7 +447,7 @@ def train(cfg): timer.restart() except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) @@ -477,6 +483,7 @@ def train(cfg): if cfg.TRAINER_ID == 0: save_prune_checkpoint(exe, train_prog, 'final') + def main(args): if args.cfg_file is not None: cfg.update_from_file(args.cfg_file) diff --git a/slim/quantization/README.md b/slim/quantization/README.md index 9af04033b3a9af84d4b1fdf081f156be6f8dc0c2..28a74e01a64b81d2dc9e6d022a6fab40ed3866f9 100644 --- a/slim/quantization/README.md +++ b/slim/quantization/README.md @@ -133,7 +133,20 @@ TRAIN.SYNC_BATCH_NORM False \ BATCH_SIZE 16 \ ``` +## 导出模型 +使用脚本[slim/quantization/export_model.py](./export_model.py)导出模型。 +导出命令: + +分割库根目录下运行 +``` +python -u ./slim/quantization/export_model.py --not_quant_pattern last_conv --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml \ +TEST.TEST_MODEL "./snapshots/mobilenetv2_quant/best_model" \ +MODEL.DEEPLAB.ENCODER_WITH_ASPP False \ +MODEL.DEEPLAB.ENABLE_DECODER False \ +TRAIN.SYNC_BATCH_NORM False \ +SLIM.PREPROCESS True \ +``` ## 量化结果 diff --git a/slim/quantization/eval_quant.py b/slim/quantization/eval_quant.py index f40021df10ac5cabee789ca4de04b7489b37f182..fdf6f3ce18444f85c157a301334aabfdf47869e7 100644 --- a/slim/quantization/eval_quant.py +++ b/slim/quantization/eval_quant.py @@ -105,10 +105,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): for b in data_gen: yield b[0], b[1], b[2] - py_reader, avg_loss, pred, grts, masks = build_model( + data_loader, avg_loss, pred, grts, masks = build_model( test_prog, startup_prog, phase=ModelPhase.EVAL) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE) # Get device environment @@ -152,7 +152,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs): all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1 timer = Timer() timer.start() - py_reader.start() + data_loader.start() while True: try: step += 1 diff --git a/slim/quantization/export_model.py b/slim/quantization/export_model.py new file mode 100644 index 0000000000000000000000000000000000000000..3891254bd18470c2af94cb74fe8e06e942b86cb5 --- /dev/null +++ b/slim/quantization/export_model.py @@ -0,0 +1,149 @@ +# coding: utf8 +# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import os +import sys +import time +import pprint +import cv2 +import argparse +import numpy as np +import paddle.fluid as fluid + +from utils.config import cfg +from models.model_builder import build_model +from models.model_builder import ModelPhase +from paddleslim.quant import quant_aware, convert + + +def parse_args(): + parser = argparse.ArgumentParser( + description='PaddleSeg Inference Model Exporter') + parser.add_argument( + '--cfg', + dest='cfg_file', + help='Config file for training (and optionally testing)', + default=None, + type=str) + parser.add_argument( + "--not_quant_pattern", + nargs='+', + type=str, + help= + "Layers which name_scope contains string in not_quant_pattern will not be quantized" + ) + parser.add_argument( + 'opts', + help='See utils/config.py for all options', + default=None, + nargs=argparse.REMAINDER) + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + return parser.parse_args() + + +def export_inference_config(): + deploy_cfg = '''DEPLOY: + USE_GPU : 1 + MODEL_PATH : "%s" + MODEL_FILENAME : "%s" + PARAMS_FILENAME : "%s" + EVAL_CROP_SIZE : %s + MEAN : %s + STD : %s + IMAGE_TYPE : "%s" + NUM_CLASSES : %d + CHANNELS : %d + PRE_PROCESSOR : "SegPreProcessor" + PREDICTOR_MODE : "ANALYSIS" + BATCH_SIZE : 1 + ''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME, + cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.MEAN, cfg.STD, + cfg.DATASET.IMAGE_TYPE, cfg.DATASET.NUM_CLASSES, len(cfg.STD)) + if not os.path.exists(cfg.FREEZE.SAVE_DIR): + os.mkdir(cfg.FREEZE.SAVE_DIR) + yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml') + with open(yaml_path, "w") as fp: + fp.write(deploy_cfg) + return yaml_path + + +def export_inference_model(args): + """ + Export PaddlePaddle inference model for prediction depolyment and serving. + """ + print("Exporting inference model...") + startup_prog = fluid.Program() + infer_prog = fluid.Program() + image, logit_out = build_model( + infer_prog, startup_prog, phase=ModelPhase.PREDICT) + + # Use CPU for exporting inference model instead of GPU + place = fluid.CPUPlace() + exe = fluid.Executor(place) + exe.run(startup_prog) + infer_prog = infer_prog.clone(for_test=True) + not_quant_pattern_list = [] + if args.not_quant_pattern is not None: + not_quant_pattern_list = args.not_quant_pattern + + config = { + 'weight_quantize_type': 'channel_wise_abs_max', + 'activation_quantize_type': 'moving_average_abs_max', + 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'], + 'not_quant_pattern': not_quant_pattern_list + } + + infer_prog = quant_aware(infer_prog, place, config, for_test=True) + if os.path.exists(cfg.TEST.TEST_MODEL): + fluid.io.load_persistables( + exe, cfg.TEST.TEST_MODEL, main_program=infer_prog) + else: + print("TEST.TEST_MODEL diretory is empty!") + exit(-1) + + infer_prog = convert(infer_prog, place, config) + + fluid.io.save_inference_model( + cfg.FREEZE.SAVE_DIR, + feeded_var_names=[image.name], + target_vars=[logit_out], + executor=exe, + main_program=infer_prog, + model_filename=cfg.FREEZE.MODEL_FILENAME, + params_filename=cfg.FREEZE.PARAMS_FILENAME) + print("Inference model exported!") + print("Exporting inference model config...") + deploy_cfg_path = export_inference_config() + print("Inference model saved : [%s]" % (deploy_cfg_path)) + + +def main(): + args = parse_args() + if args.cfg_file is not None: + cfg.update_from_file(args.cfg_file) + if args.opts: + cfg.update_from_list(args.opts) + cfg.check_and_infer() + print(pprint.pformat(cfg)) + export_inference_model(args) + + +if __name__ == '__main__': + main() diff --git a/slim/quantization/train_quant.py b/slim/quantization/train_quant.py index 6a29dccdbaeda54b06c11299fb37e979cec6e401..1034b7234d73f21c41085d5a04d74069b04de7ca 100644 --- a/slim/quantization/train_quant.py +++ b/slim/quantization/train_quant.py @@ -157,9 +157,9 @@ def train_quant(cfg): batch_size_per_dev = cfg.BATCH_SIZE // dev_count print_info("batch_size_per_dev: {}".format(batch_size_per_dev)) - py_reader, avg_loss, lr, pred, grts, masks = build_model( + data_loader, avg_loss, lr, pred, grts, masks = build_model( train_prog, startup_prog, phase=ModelPhase.TRAIN) - py_reader.decorate_sample_generator( + data_loader.set_sample_generator( data_generator, batch_size=batch_size_per_dev, drop_last=drop_last) exe = fluid.Executor(place) @@ -274,7 +274,7 @@ def train_quant(cfg): print_info("Use multi-thread reader") for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1): - py_reader.start() + data_loader.start() while True: try: if args.debug: @@ -326,7 +326,7 @@ def train_quant(cfg): timer.restart() except fluid.core.EOFException: - py_reader.reset() + data_loader.reset() break except Exception as e: print(e) diff --git a/turtorial/finetune_fast_scnn.md b/turtorial/finetune_fast_scnn.md index 188a51edf9d138bb6832849c9ab2ad8afbcd3cd4..31541b796849277085104abf1df13284e264fae8 100644 --- a/turtorial/finetune_fast_scnn.md +++ b/turtorial/finetune_fast_scnn.md @@ -114,6 +114,6 @@ python pdseg/eval.py --use_gpu --cfg ./configs/fast_scnn_pet.yaml | ICNet/bn |(1024, 2048) |8.76ms| 0.6831 | | Fast-SCNN/bn | (1024, 2048) |6.28ms| 0.6964 | -上述测试环境为v100. 测试使用paddle的推理接口[zero_copy](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/python_infer_cn.html#id8)的方式,模型输出是类别,即argmax后的值。 +上述测试环境为v100. 测试使用paddle的推理接口[zero_copy](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/python_infer_cn.html#id8)的方式,模型输出是类别,即argmax后的值。