diff --git a/README.md b/README.md
index 0fefe77bb4c078a15c4f02a6d189a240cf304de6..23558c5a9e7c099c1599da7bfa1d25ac910fe87a 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
## 简介
-PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义分割库,覆盖了DeepLabv3+, U-Net, ICNet, PSPNet, HRNet等主流分割模型。通过统一的配置,帮助用户更便捷地完成从训练到部署的全流程图像分割应用。
+PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义分割库,覆盖了DeepLabv3+, U-Net, ICNet, PSPNet, HRNet, Fast-SCNN等主流分割模型。通过统一的配置,帮助用户更便捷地完成从训练到部署的全流程图像分割应用。
@@ -33,7 +33,7 @@ PaddleSeg是基于[PaddlePaddle](https://www.paddlepaddle.org.cn)开发的语义
- **模块化设计**
-支持U-Net, DeepLabv3+, ICNet, PSPNet, HRNet五种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, BCE Loss等方式可以强化小目标和不均衡样本场景下的分割精度。
+支持U-Net, DeepLabv3+, ICNet, PSPNet, HRNet, Fast-SCNN六种主流分割网络,结合预训练模型和可调节的骨干网络,满足不同性能和精度的要求;选择不同的损失函数如Dice Loss, BCE Loss等方式可以强化小目标和不均衡样本场景下的分割精度。
- **高性能**
@@ -163,6 +163,14 @@ A: 请将PaddlePaddle升级至1.5.2版本或以上。
微信公众号 官方技术交流QQ群
## 更新日志
+* 2020.02.25
+
+ **`v0.4.0`**
+ * 新增适用于实时场景且不需要预训练模型的分割网络Fast-SCNN,提供基于Cityscapes的[预训练模型](./docs/model_zoo.md)1个。
+ * 新增LaneNet车道线检测网络,提供[预训练模型](https://github.com/PaddlePaddle/PaddleSeg/tree/release/v0.4.0/contrib/LaneNet#%E4%B8%83-%E5%8F%AF%E8%A7%86%E5%8C%96)一个。
+ * 新增基于PaddleSlim的分割库压缩策略([量化](./slim/quantization/README.md), [蒸馏](./slim/distillation/README.md), [剪枝](./slim/prune/README.md), [搜索](./slim/nas/README.md))
+
+
* 2019.12.15
**`v0.3.0`**
diff --git a/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml b/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..0dbeddd2142f4b504a130c4273be4ef77cfadef5
--- /dev/null
+++ b/configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml
@@ -0,0 +1,49 @@
+EVAL_CROP_SIZE: (1025, 1025) # (width, height), for unpadding rangescaling and stepscaling
+TRAIN_CROP_SIZE: (769, 769) # (width, height), for unpadding rangescaling and stepscaling
+AUG:
+ AUG_METHOD: u"stepscaling" # choice unpadding rangescaling and stepscaling
+ FIX_RESIZE_SIZE: (640, 640) # (width, height), for unpadding
+ INF_RESIZE_VALUE: 500 # for rangescaling
+ MAX_RESIZE_VALUE: 600 # for rangescaling
+ MIN_RESIZE_VALUE: 400 # for rangescaling
+ MAX_SCALE_FACTOR: 2.0 # for stepscaling
+ MIN_SCALE_FACTOR: 0.5 # for stepscaling
+ SCALE_STEP_SIZE: 0.25 # for stepscaling
+ FLIP: True
+BATCH_SIZE: 24
+DATASET:
+ DATA_DIR: "./dataset/MiniDeepGlobeRoadExtraction/"
+ IMAGE_TYPE: "rgb" # choice rgb or rgba
+ NUM_CLASSES: 2
+ TEST_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt"
+ TRAIN_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/train.txt"
+ VAL_FILE_LIST: "dataset/MiniDeepGlobeRoadExtraction/val.txt"
+ IGNORE_INDEX: 255
+ SEPARATOR: '|'
+FREEZE:
+ MODEL_FILENAME: "model"
+ PARAMS_FILENAME: "params"
+ SAVE_DIR: "freeze_model"
+MODEL:
+ DEFAULT_NORM_TYPE: "bn"
+ MODEL_NAME: "deeplabv3p"
+ DEEPLAB:
+ BACKBONE: "mobilenetv2"
+ DEPTH_MULTIPLIER: 1.0
+ ENCODER_WITH_ASPP: False
+ ENABLE_DECODER: False
+TEST:
+ TEST_MODEL: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/final"
+TRAIN:
+ MODEL_SAVE_DIR: "./saved_model/lovasz_hinge_deeplabv3p_mobilenet_road/"
+ PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/"
+ SNAPSHOT_EPOCH: 10
+SOLVER:
+ LR: 0.1
+ LR_POLICY: "poly"
+ OPTIMIZER: "sgd"
+ NUM_EPOCHS: 300
+ LOSS: ["lovasz_hinge_loss","bce_loss"]
+ LOSS_WEIGHT:
+ LOVASZ_HINGE_LOSS: 0.5
+ BCE_LOSS: 0.5
diff --git a/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml b/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml
new file mode 100755
index 0000000000000000000000000000000000000000..0c083ba509b1d6c83e852ab261da887b3c451370
--- /dev/null
+++ b/configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml
@@ -0,0 +1,48 @@
+TRAIN_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #训练时图像裁剪尺寸(宽,高)
+EVAL_CROP_SIZE: (500, 500) # (width, height), for unpadding rangescaling and stepscaling #验证时图像裁剪尺寸(宽,高)
+AUG:
+ AUG_METHOD: "stepscaling" # choice unpadding rangescaling and stepscaling
+ FIX_RESIZE_SIZE: (500, 500) # (width, height), for unpadding
+
+ INF_RESIZE_VALUE: 500 # for rangescaling
+ MAX_RESIZE_VALUE: 600 # for rangescaling
+ MIN_RESIZE_VALUE: 400 # for rangescaling
+
+ MAX_SCALE_FACTOR: 1.25 # for stepscaling
+ MIN_SCALE_FACTOR: 0.75 # for stepscaling
+ SCALE_STEP_SIZE: 0.05 # for stepscaling
+ MIRROR: True
+ FLIP: True
+BATCH_SIZE: 16 #批处理大小
+DATASET:
+ DATA_DIR: "./dataset/VOCtrainval_11-May-2012/VOC2012/" #图片路径
+ IMAGE_TYPE: "rgb" # choice rgb or rgba #图片类别“RGB”
+ NUM_CLASSES: 21 #类别数(包括背景类别)
+ TEST_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list"
+ TRAIN_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/train.list"
+ VAL_FILE_LIST: "dataset/VOCtrainval_11-May-2012/VOC2012/ImageSets/Segmentation/val.list"
+ IGNORE_INDEX: 255
+ SEPARATOR: " "
+MODEL:
+ MODEL_NAME: "deeplabv3p"
+ DEFAULT_NORM_TYPE: "bn" #指定norm的类型,此处提供bn和gn(默认)两种选择,分别指batch norm和group norm。
+ DEEPLAB:
+ BACKBONE: "mobilenetv2"
+ DEPTH_MULTIPLIER: 1.0
+ ENCODER_WITH_ASPP: False
+ ENABLE_DECODER: False
+TRAIN:
+ PRETRAINED_MODEL_DIR: "./pretrained_model/deeplabv3p_mobilenetv2-1-0_bn_coco/"
+ MODEL_SAVE_DIR: "./saved_model/lovasz-softmax-voc" #模型保存路径
+ SNAPSHOT_EPOCH: 10
+TEST:
+ TEST_MODEL: "./saved_model/lovasz-softmax-voc/final" #为测试模型路径
+SOLVER:
+ NUM_EPOCHS: 100 #训练epoch数,正整数
+ LR: 0.0001 #初始学习率
+ LR_POLICY: "poly" #学习率下降方法, 选项为poly、piecewise和cosine
+ OPTIMIZER: "sgd" #优化算法, 选项为sgd和adam
+ LOSS: ["lovasz_softmax_loss","softmax_loss"]
+ LOSS_WEIGHT:
+ LOVASZ_SOFTMAX_LOSS: 0.2
+ SOFTMAX_LOSS: 0.8
diff --git a/contrib/LaneNet/README.md b/contrib/LaneNet/README.md
index b86777305c160edae7a55349d719c9df2a2da4f9..1448951e900dbb8bb235be476698eb13d62f5e4c 100644
--- a/contrib/LaneNet/README.md
+++ b/contrib/LaneNet/README.md
@@ -108,7 +108,7 @@ SOLVER:
使用下述命令启动训练
```shell
-CUDA_VISIBLE_DEVICES=0 python -u train.py --cfg configs/lanenet.yaml --use_gpu --use_mpio --do_eval
+CUDA_VISIBLE_DEVICES=0 python -u train.py --cfg configs/lanenet.yaml --use_gpu --do_eval
```
## 六. 进行评估
diff --git a/contrib/LaneNet/eval.py b/contrib/LaneNet/eval.py
index 9256c4f024e7d15c9c018c4fe5930e5b7865c7e0..025fb4e7284adefdafa4365ace347a68e2addf48 100644
--- a/contrib/LaneNet/eval.py
+++ b/contrib/LaneNet/eval.py
@@ -101,10 +101,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
for b in data_gen:
yield b
- py_reader, pred, grts, masks, accuracy, fp, fn = build_model(
+ data_loader, pred, grts, masks, accuracy, fp, fn = build_model(
test_prog, startup_prog, phase=ModelPhase.EVAL)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
# Get device environment
@@ -127,7 +127,9 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
# Use streaming confusion matrix to calculate mean_iou
np.set_printoptions(
precision=4, suppress=True, linewidth=160, floatmode="fixed")
- fetch_list = [pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name]
+ fetch_list = [
+ pred.name, grts.name, masks.name, accuracy.name, fp.name, fn.name
+ ]
num_images = 0
step = 0
avg_acc = 0.0
@@ -137,7 +139,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
timer = Timer()
timer.start()
- py_reader.start()
+ data_loader.start()
while True:
try:
step += 1
@@ -153,7 +155,8 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
print(
"[EVAL]step={} accuracy={:.4f} fp={:.4f} fn={:.4f} step/sec={:.2f} | ETA {}"
- .format(step, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images, speed,
+ .format(step, avg_acc / num_images, avg_fp / num_images,
+ avg_fn / num_images, speed,
calculate_eta(all_step - step, speed)))
timer.restart()
@@ -162,7 +165,8 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
break
print("[EVAL]#image={} accuracy={:.4f} fp={:.4f} fn={:.4f}".format(
- num_images, avg_acc / num_images, avg_fp / num_images, avg_fn / num_images))
+ num_images, avg_acc / num_images, avg_fp / num_images,
+ avg_fn / num_images))
return avg_acc / num_images, avg_fp / num_images, avg_fn / num_images
diff --git a/contrib/LaneNet/models/model_builder.py b/contrib/LaneNet/models/model_builder.py
index ed6c275ecd51a2fc9f7f2fdf125300ce026c0a0a..b274e95a7b761f0345923c7702f91420a2469404 100644
--- a/contrib/LaneNet/models/model_builder.py
+++ b/contrib/LaneNet/models/model_builder.py
@@ -25,6 +25,7 @@ from pdseg.loss import multi_softmax_with_loss
from loss import discriminative_loss
from models.modeling import lanenet
+
class ModelPhase(object):
"""
Standard name for model phase in PaddleSeg
@@ -107,35 +108,31 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
width = cfg.EVAL_CROP_SIZE[0]
height = cfg.EVAL_CROP_SIZE[1]
- image_shape = [cfg.DATASET.DATA_DIM, height, width]
- grt_shape = [1, height, width]
+ image_shape = [-1, cfg.DATASET.DATA_DIM, height, width]
+ grt_shape = [-1, 1, height, width]
class_num = cfg.DATASET.NUM_CLASSES
with fluid.program_guard(main_prog, start_prog):
with fluid.unique_name.guard():
- image = fluid.layers.data(
- name='image', shape=image_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=grt_shape, dtype='int32')
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.data(name='label', shape=grt_shape, dtype='int32')
if cfg.MODEL.MODEL_NAME == 'lanenet':
- label_instance = fluid.layers.data(
+ label_instance = fluid.data(
name='label_instance', shape=grt_shape, dtype='int32')
- mask = fluid.layers.data(
- name='mask', shape=grt_shape, dtype='int32')
+ mask = fluid.data(name='mask', shape=grt_shape, dtype='int32')
- # use PyReader when doing traning and evaluation
+ # use DataLoader.from_generator when doing traning and evaluation
if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase):
- py_reader = fluid.io.PyReader(
+ data_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label, label_instance, mask],
capacity=cfg.DATALOADER.BUF_SIZE,
iterable=False,
use_double_buffer=True)
-
loss_type = cfg.SOLVER.LOSS
if not isinstance(loss_type, list):
loss_type = list(loss_type)
-
+
logits = seg_model(image, class_num)
if ModelPhase.is_train(phase):
@@ -144,25 +141,30 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
if cfg.MODEL.MODEL_NAME == 'lanenet':
embeding_logit = logits[1]
logits = logits[0]
- disc_loss, _, _, l_reg = discriminative_loss(embeding_logit, label_instance, 4,
- image_shape[1:], 0.5, 3.0, 1.0, 1.0, 0.001)
+ disc_loss, _, _, l_reg = discriminative_loss(
+ embeding_logit, label_instance, 4, image_shape[2:], 0.5,
+ 3.0, 1.0, 1.0, 0.001)
if "softmax_loss" in loss_type:
weight = None
if cfg.MODEL.MODEL_NAME == 'lanenet':
weight = get_dynamic_weight(label)
- seg_loss = multi_softmax_with_loss(logits, label, mask, class_num, weight)
+ seg_loss = multi_softmax_with_loss(logits, label, mask,
+ class_num, weight)
loss_valid = True
valid_loss.append("softmax_loss")
if not loss_valid:
- raise Exception("SOLVER.LOSS: {} is set wrong. it should "
- "include one of (softmax_loss, bce_loss, dice_loss) at least"
- " example: ['softmax_loss']".format(cfg.SOLVER.LOSS))
+ raise Exception(
+ "SOLVER.LOSS: {} is set wrong. it should "
+ "include one of (softmax_loss, bce_loss, dice_loss) at least"
+ " example: ['softmax_loss']".format(cfg.SOLVER.LOSS))
invalid_loss = [x for x in loss_type if x not in valid_loss]
if len(invalid_loss) > 0:
- print("Warning: the loss {} you set is invalid. it will not be included in loss computed.".format(invalid_loss))
+ print(
+ "Warning: the loss {} you set is invalid. it will not be included in loss computed."
+ .format(invalid_loss))
avg_loss = disc_loss + 0.00001 * l_reg + seg_loss
@@ -202,12 +204,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
accuracy, fp, fn = compute_metric(pred, label)
if ModelPhase.is_eval(phase):
- return py_reader, pred, label, mask, accuracy, fp, fn
+ return data_loader, pred, label, mask, accuracy, fp, fn
if ModelPhase.is_train(phase):
optimizer = solver.Solver(main_prog, start_prog)
decayed_lr = optimizer.optimise(avg_loss)
- return py_reader, avg_loss, decayed_lr, pred, label, mask, disc_loss, seg_loss, accuracy, fp, fn
+ return data_loader, avg_loss, decayed_lr, pred, label, mask, disc_loss, seg_loss, accuracy, fp, fn
def compute_metric(pred, label):
@@ -216,19 +218,27 @@ def compute_metric(pred, label):
idx = fluid.layers.where(pred == 1)
pix_cls_ret = fluid.layers.gather_nd(label, idx)
- correct_num = fluid.layers.reduce_sum(fluid.layers.cast(pix_cls_ret, 'float32'))
+ correct_num = fluid.layers.reduce_sum(
+ fluid.layers.cast(pix_cls_ret, 'float32'))
- gt_num = fluid.layers.cast(fluid.layers.shape(fluid.layers.gather_nd(label,
- fluid.layers.where(label == 1)))[0], 'int64')
- pred_num = fluid.layers.cast(fluid.layers.shape(fluid.layers.gather_nd(pred, idx))[0], 'int64')
+ gt_num = fluid.layers.cast(
+ fluid.layers.shape(
+ fluid.layers.gather_nd(label, fluid.layers.where(label == 1)))[0],
+ 'int64')
+ pred_num = fluid.layers.cast(
+ fluid.layers.shape(fluid.layers.gather_nd(pred, idx))[0], 'int64')
accuracy = correct_num / gt_num
false_pred = pred_num - correct_num
- fp = fluid.layers.cast(false_pred, 'float32') / fluid.layers.cast(fluid.layers.shape(pix_cls_ret)[0], 'int64')
-
- label_cls_ret = fluid.layers.gather_nd(label, fluid.layers.where(label == 1))
- mis_pred = fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], 'int64') - correct_num
- fn = fluid.layers.cast(mis_pred, 'float32') / fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0], 'int64')
+ fp = fluid.layers.cast(false_pred, 'float32') / fluid.layers.cast(
+ fluid.layers.shape(pix_cls_ret)[0], 'int64')
+
+ label_cls_ret = fluid.layers.gather_nd(label,
+ fluid.layers.where(label == 1))
+ mis_pred = fluid.layers.cast(fluid.layers.shape(label_cls_ret)[0],
+ 'int64') - correct_num
+ fn = fluid.layers.cast(mis_pred, 'float32') / fluid.layers.cast(
+ fluid.layers.shape(label_cls_ret)[0], 'int64')
accuracy.stop_gradient = True
fp.stop_gradient = True
fn.stop_gradient = True
@@ -239,7 +249,8 @@ def get_dynamic_weight(label):
label = fluid.layers.reshape(label, [-1])
unique_labels, unique_id, counts = fluid.layers.unique_with_counts(label)
counts = fluid.layers.cast(counts, 'float32')
- weight = 1.0 / fluid.layers.log((counts / fluid.layers.reduce_sum(counts) + 1.02))
+ weight = 1.0 / fluid.layers.log(
+ (counts / fluid.layers.reduce_sum(counts) + 1.02))
return weight
diff --git a/contrib/LaneNet/train.py b/contrib/LaneNet/train.py
index 3ee9489c9b18b19b6b84615a400815a3bc33ccb2..c2f5bee7547eabe9ef5c998b197fbaf59130d679 100644
--- a/contrib/LaneNet/train.py
+++ b/contrib/LaneNet/train.py
@@ -232,9 +232,9 @@ def train(cfg):
cfg.BATCH_SIZE_PER_DEV = batch_size_per_dev
print_info("batch_size_per_dev: {}".format(batch_size_per_dev))
- py_reader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model(
+ data_loader, avg_loss, lr, pred, grts, masks, emb_loss, seg_loss, accuracy, fp, fn = build_model(
train_prog, startup_prog, phase=ModelPhase.TRAIN)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -315,7 +315,10 @@ def train(cfg):
format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
# fetch_list = [avg_loss.name, lr.name, accuracy.name, precision.name, recall.name]
- fetch_list = [avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name, fp.name, fn.name]
+ fetch_list = [
+ avg_loss.name, lr.name, seg_loss.name, emb_loss.name, accuracy.name,
+ fp.name, fn.name
+ ]
if args.debug:
# Fetch more variable info and use streaming confusion matrix to
# calculate IoU results if in debug mode
@@ -359,7 +362,7 @@ def train(cfg):
print_info("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
# If not in debug mode, avoid unnessary log and calculate
@@ -385,16 +388,15 @@ def train(cfg):
avg_fn /= args.log_steps
speed = args.log_steps / timer.elapsed_time()
print((
- "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}"
- ).format(epoch, global_step, lr[0], avg_loss, avg_seg_loss, avg_emb_loss, avg_acc, avg_fp, avg_fn, speed,
- calculate_eta(all_step - global_step, speed)))
+ "epoch={} step={} lr={:.5f} loss={:.4f} seg_loss={:.4f} emb_loss={:.4f} accuracy={:.4} fp={:.4} fn={:.4} step/sec={:.3f} | ETA {}"
+ ).format(epoch, global_step, lr[0], avg_loss, avg_seg_loss,
+ avg_emb_loss, avg_acc, avg_fp, avg_fn, speed,
+ calculate_eta(all_step - global_step, speed)))
if args.use_tb:
log_writer.add_scalar('Train/loss', avg_loss,
global_step)
- log_writer.add_scalar('Train/lr', lr[0],
- global_step)
- log_writer.add_scalar('Train/speed', speed,
- global_step)
+ log_writer.add_scalar('Train/lr', lr[0], global_step)
+ log_writer.add_scalar('Train/speed', speed, global_step)
sys.stdout.flush()
avg_loss = 0.0
avg_seg_loss = 0.0
@@ -405,7 +407,7 @@ def train(cfg):
timer.restart()
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
@@ -423,10 +425,8 @@ def train(cfg):
if args.use_tb:
log_writer.add_scalar('Evaluate/accuracy', accuracy,
global_step)
- log_writer.add_scalar('Evaluate/fp', fp,
- global_step)
- log_writer.add_scalar('Evaluate/fn', fn,
- global_step)
+ log_writer.add_scalar('Evaluate/fp', fp, global_step)
+ log_writer.add_scalar('Evaluate/fn', fn, global_step)
# Use Tensorboard to visualize results
if args.use_tb and cfg.DATASET.VIS_FILE_LIST is not None:
diff --git a/contrib/RealTimeHumanSeg/README.md b/contrib/RealTimeHumanSeg/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e8693e11e4d66b9a2ee04bf1e03a5704a95fb426
--- /dev/null
+++ b/contrib/RealTimeHumanSeg/README.md
@@ -0,0 +1,28 @@
+# 实时人像分割预测部署
+
+本模型基于飞浆开源的人像分割模型,并做了大量的针对视频的光流追踪优化,提供了完整的支持视频流的实时人像分割解决方案,并提供了高性能的`Python`和`C++`集成部署方案,以满足不同场景的需求。
+
+
+## 模型下载
+
+支持的模型文件如下,请根据应用场景选择合适的模型:
+|模型文件 | 说明 |
+|---|---|
+|[shv75_deeplab_0303_quant](https://paddleseg.bj.bcebos.com/deploy/models/shv75_0303_quant.zip) | 小模型, 适合轻量级计算环境 |
+|[shv75_deeplab_0303](https://paddleseg.bj.bcebos.com/deploy/models/shv75_deeplab_0303.zip)| 小模型,适合轻量级计算环境 |
+|[deeplabv3_xception_humanseg](https://paddleseg.bj.bcebos.com/deploy/models/deeplabv3_xception_humanseg.zip) | 服务端GPU环境 |
+
+**注意:下载后解压到合适的路径,后续该路径将做为预测参数用于加载模型。**
+
+
+## 预测部署
+- [Python预测部署](./python)
+- [C++预测部署](./cpp)
+
+## 效果预览
+
+
+
+
+
+
diff --git a/contrib/RealTimeHumanSeg/python/README.md b/contrib/RealTimeHumanSeg/python/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..1e089c9f5226e2482cd6e8957406c00095706b1b
--- /dev/null
+++ b/contrib/RealTimeHumanSeg/python/README.md
@@ -0,0 +1,61 @@
+# 实时人像分割Python预测部署方案
+
+本方案基于Python实现,最小化依赖并把所有模型加载、数据预处理、预测、光流处理等后处理都封装在文件`infer.py`中,用户可以直接使用或集成到自己项目中。
+
+
+## 前置依赖
+- Windows(7,8,10) / Linux (Ubuntu 16.04) or MacOS 10.1+
+- Paddle 1.6.1+
+- Python 3.0+
+
+注意:
+1. 仅测试过Paddle1.6 和 1.7, 其它版本不支持
+2. MacOS上不支持GPU预测
+3. Python2上未测试
+
+其它未涉及情形,能正常安装`Paddle` 和`OpenCV`通常都能正常使用。
+
+
+## 安装依赖
+### 1. 安装paddle
+
+PaddlePaddle的安装, 请按照[官网指引](https://paddlepaddle.org.cn/install/quick)安装合适自己的版本。
+
+### 2. 安装其它依赖
+
+执行如下命令
+
+```shell
+pip install -r requirements.txt
+```
+
+## 运行
+
+
+1. 输入图片进行分割
+```
+python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --img_path /PATH/TO/INPUT/IMAGE
+```
+
+预测结果会保存为`result.jpeg`。
+2. 输入视频进行分割
+```shell
+python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --video_path /PATH/TO/INPUT/VIDEO
+```
+
+预测结果会保存在`result.avi`。
+
+3. 使用摄像头视频流
+```shell
+python infer.py --model_dir /PATH/TO/INFERENCE/MODEL --use_camera 1
+```
+预测结果会通过可视化窗口实时显示。
+
+**注意:**
+
+
+`GPU`默认关闭, 如果要使用`GPU`进行加速,则先运行
+```
+export CUDA_VISIBLE_DEVICES=0
+```
+然后在前面的预测命令中增加参数`--use_gpu 1`即可。
diff --git a/contrib/RealTimeHumanSeg/python/infer.py b/contrib/RealTimeHumanSeg/python/infer.py
new file mode 100644
index 0000000000000000000000000000000000000000..73df081e4cbda06e20b471b2eae60a2ba037e49a
--- /dev/null
+++ b/contrib/RealTimeHumanSeg/python/infer.py
@@ -0,0 +1,345 @@
+# coding: utf8
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""实时人像分割Python预测部署"""
+
+import os
+import argparse
+import numpy as np
+import cv2
+
+import paddle.fluid as fluid
+
+
+def human_seg_tracking(pre_gray, cur_gray, prev_cfd, dl_weights, disflow):
+ """计算光流跟踪匹配点和光流图
+ 输入参数:
+ pre_gray: 上一帧灰度图
+ cur_gray: 当前帧灰度图
+ prev_cfd: 上一帧光流图
+ dl_weights: 融合权重图
+ disflow: 光流数据结构
+ 返回值:
+ is_track: 光流点跟踪二值图,即是否具有光流点匹配
+ track_cfd: 光流跟踪图
+ """
+ check_thres = 8
+ hgt, wdh = pre_gray.shape[:2]
+ track_cfd = np.zeros_like(prev_cfd)
+ is_track = np.zeros_like(pre_gray)
+ # 计算前向光流
+ flow_fw = disflow.calc(pre_gray, cur_gray, None)
+ # 计算后向光流
+ flow_bw = disflow.calc(cur_gray, pre_gray, None)
+ get_round = lambda data: (int)(data + 0.5) if data >= 0 else (int)(data -0.5)
+ for row in range(hgt):
+ for col in range(wdh):
+ # 计算光流处理后对应点坐标
+ # (row, col) -> (cur_x, cur_y)
+ fxy_fw = flow_fw[row, col]
+ dx_fw = get_round(fxy_fw[0])
+ cur_x = dx_fw + col
+ dy_fw = get_round(fxy_fw[1])
+ cur_y = dy_fw + row
+ if cur_x < 0 or cur_x >= wdh or cur_y < 0 or cur_y >= hgt:
+ continue
+ fxy_bw = flow_bw[cur_y, cur_x]
+ dx_bw = get_round(fxy_bw[0])
+ dy_bw = get_round(fxy_bw[1])
+ # 光流移动小于阈值
+ lmt = ((dy_fw + dy_bw) * (dy_fw + dy_bw) + (dx_fw + dx_bw) * (dx_fw + dx_bw))
+ if lmt >= check_thres:
+ continue
+ # 静止点降权
+ if abs(dy_fw) <= 0 and abs(dx_fw) <= 0 and abs(dy_bw) <= 0 and abs(dx_bw) <= 0:
+ dl_weights[cur_y, cur_x] = 0.05
+ is_track[cur_y, cur_x] = 1
+ track_cfd[cur_y, cur_x] = prev_cfd[row, col]
+ return track_cfd, is_track, dl_weights
+
+
+def human_seg_track_fuse(track_cfd, dl_cfd, dl_weights, is_track):
+ """光流追踪图和人像分割结构融合
+ 输入参数:
+ track_cfd: 光流追踪图
+ dl_cfd: 当前帧分割结果
+ dl_weights: 融合权重图
+ is_track: 光流点匹配二值图
+ 返回值:
+ cur_cfd: 光流跟踪图和人像分割结果融合图
+ """
+ cur_cfd = dl_cfd.copy()
+ idxs = np.where(is_track > 0)
+ for i in range(len(idxs)):
+ x, y = idxs[0][i], idxs[1][i]
+ dl_score = dl_cfd[y, x]
+ track_score = track_cfd[y, x]
+ if dl_score > 0.9 or dl_score < 0.1:
+ if dl_weights[x, y] < 0.1:
+ cur_cfd[x, y] = 0.3 * dl_score + 0.7 * track_score
+ else:
+ cur_cfd[x, y] = 0.4 * dl_score + 0.6 * track_score
+ else:
+ cur_cfd[x, y] = dl_weights[x, y] * dl_score + (1 - dl_weights[x, y]) * track_score
+ return cur_cfd
+
+
+def threshold_mask(img, thresh_bg, thresh_fg):
+ """设置背景和前景阈值mask
+ 输入参数:
+ img : 原始图像, np.uint8 类型.
+ thresh_bg : 背景阈值百分比,低于该值置为0.
+ thresh_fg : 前景阈值百分比,超过该值置为1.
+ 返回值:
+ dst : 原始图像设置完前景背景阈值mask结果, np.float32 类型.
+ """
+ dst = (img / 255.0 - thresh_bg) / (thresh_fg - thresh_bg)
+ dst[np.where(dst > 1)] = 1
+ dst[np.where(dst < 0)] = 0
+ return dst.astype(np.float32)
+
+
+def optflow_handle(cur_gray, scoremap, is_init):
+ """光流优化
+ Args:
+ cur_gray : 当前帧灰度图
+ scoremap : 当前帧分割结果
+ is_init : 是否第一帧
+ Returns:
+ dst : 光流追踪图和预测结果融合图, 类型为 np.float32
+ """
+ width, height = scoremap.shape[0], scoremap.shape[1]
+ disflow = cv2.DISOpticalFlow_create(
+ cv2.DISOPTICAL_FLOW_PRESET_ULTRAFAST)
+ prev_gray = np.zeros((height, width), np.uint8)
+ prev_cfd = np.zeros((height, width), np.float32)
+ cur_cfd = scoremap.copy()
+ if is_init:
+ is_init = False
+ if height <= 64 or width <= 64:
+ disflow.setFinestScale(1)
+ elif height <= 160 or width <= 160:
+ disflow.setFinestScale(2)
+ else:
+ disflow.setFinestScale(3)
+ fusion_cfd = cur_cfd
+ else:
+ weights = np.ones((width, height), np.float32) * 0.3
+ track_cfd, is_track, weights = human_seg_tracking(
+ prev_gray, cur_gray, prev_cfd, weights, disflow)
+ fusion_cfd = human_seg_track_fuse(track_cfd, cur_cfd, weights, is_track)
+ fusion_cfd = cv2.GaussianBlur(fusion_cfd, (3, 3), 0)
+ return fusion_cfd
+
+
+class HumanSeg:
+ """人像分割类
+ 封装了人像分割模型的加载,数据预处理,预测,后处理等
+ """
+ def __init__(self, model_dir, mean, scale, eval_size, use_gpu=False):
+
+ self.mean = np.array(mean).reshape((3, 1, 1))
+ self.scale = np.array(scale).reshape((3, 1, 1))
+ self.eval_size = eval_size
+ self.load_model(model_dir, use_gpu)
+
+ def load_model(self, model_dir, use_gpu):
+ """加载模型并创建predictor
+ Args:
+ model_dir: 预测模型路径, 包含 `__model__` 和 `__params__`
+ use_gpu: 是否使用GPU加速
+ """
+ prog_file = os.path.join(model_dir, '__model__')
+ params_file = os.path.join(model_dir, '__params__')
+ config = fluid.core.AnalysisConfig(prog_file, params_file)
+ if use_gpu:
+ config.enable_use_gpu(100, 0)
+ config.switch_ir_optim(True)
+ else:
+ config.disable_gpu()
+ config.disable_glog_info()
+ config.switch_specify_input_names(True)
+ config.enable_memory_optim()
+ self.predictor = fluid.core.create_paddle_predictor(config)
+
+ def preprocess(self, image):
+ """图像预处理
+ hwc_rgb 转换为 chw_bgr,并进行归一化
+ 输入参数:
+ image: 原始图像
+ 返回值:
+ 经过预处理后的图片结果
+ """
+ img_mat = cv2.resize(
+ image, self.eval_size, interpolation=cv2.INTER_LINEAR)
+ # HWC -> CHW
+ img_mat = img_mat.swapaxes(1, 2)
+ img_mat = img_mat.swapaxes(0, 1)
+ # Convert to float
+ img_mat = img_mat[:, :, :].astype('float32')
+ # img_mat = (img_mat - mean) * scale
+ img_mat = img_mat - self.mean
+ img_mat = img_mat * self.scale
+ img_mat = img_mat[np.newaxis, :, :, :]
+ return img_mat
+
+ def postprocess(self, image, output_data):
+ """对预测结果进行后处理
+ Args:
+ image: 原始图,opencv 图片对象
+ output_data: Paddle预测结果原始数据
+ Returns:
+ 原图和预测结果融合并做了光流优化的结果图
+ """
+ scoremap = output_data[0, 1, :, :]
+ scoremap = (scoremap * 255).astype(np.uint8)
+ ori_h, ori_w = image.shape[0], image.shape[1]
+ evl_h, evl_w = self.eval_size[0], self.eval_size[1]
+ # 光流处理
+ cur_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
+ cur_gray = cv2.resize(cur_gray, (evl_w, evl_h))
+ optflow_map = optflow_handle(cur_gray, scoremap, False)
+ optflow_map = cv2.GaussianBlur(optflow_map, (3, 3), 0)
+ optflow_map = threshold_mask(optflow_map, thresh_bg=0.2, thresh_fg=0.8)
+ optflow_map = cv2.resize(optflow_map, (ori_w, ori_h))
+ optflow_map = np.repeat(optflow_map[:, :, np.newaxis], 3, axis=2)
+ bg_im = np.ones_like(optflow_map) * 255
+ comb = (optflow_map * image + (1 - optflow_map) * bg_im).astype(np.uint8)
+ return comb
+
+ def run_predict(self, image):
+ """运行预测并返回可视化结果图
+ 输入参数:
+ image: 需要预测的原始图, opencv图片对象
+ 返回值:
+ 可视化的预测结果图
+ """
+ im_mat = self.preprocess(image)
+ im_tensor = fluid.core.PaddleTensor(im_mat.copy().astype('float32'))
+ output_data = self.predictor.run([im_tensor])[0]
+ output_data = output_data.as_ndarray()
+ return self.postprocess(image, output_data)
+
+
+def predict_image(seg, image_path):
+ """对图片文件进行分割
+ 结果保存到`result.jpeg`文件中
+ """
+ img_mat = cv2.imread(image_path)
+ img_mat = seg.run_predict(img_mat)
+ cv2.imwrite('result.jpeg', img_mat)
+
+
+def predict_video(seg, video_path):
+ """对视频文件进行分割
+ 结果保存到`result.avi`文件中
+ """
+ cap = cv2.VideoCapture(video_path)
+ if not cap.isOpened():
+ print("Error opening video stream or file")
+ return
+ width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = cap.get(cv2.CAP_PROP_FPS)
+ # 用于保存预测结果视频
+ out = cv2.VideoWriter('result.avi',
+ cv2.VideoWriter_fourcc('M', 'J', 'P', 'G'), fps,
+ (width, height))
+ # 开始获取视频帧
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if ret:
+ img_mat = seg.run_predict(frame)
+ out.write(img_mat)
+ else:
+ break
+ cap.release()
+ out.release()
+
+
+def predict_camera(seg):
+ """从摄像头获取视频流进行预测
+ 视频分割结果实时显示到可视化窗口中
+ """
+ cap = cv2.VideoCapture(0)
+ if not cap.isOpened():
+ print("Error opening video stream or file")
+ return
+ # Start capturing from video
+ while cap.isOpened():
+ ret, frame = cap.read()
+ if ret:
+ img_mat = seg.run_predict(frame)
+ cv2.imshow('HumanSegmentation', img_mat)
+ if cv2.waitKey(1) & 0xFF == ord('q'):
+ break
+ else:
+ break
+ cap.release()
+
+
+def main(args):
+ """预测程序入口
+ 完成模型加载, 对视频、摄像头、图片文件等预测过程
+ """
+ model_dir = args.model_dir
+ use_gpu = args.use_gpu
+
+ # 加载模型
+ mean = [104.008, 116.669, 122.675]
+ scale = [1.0, 1.0, 1.0]
+ eval_size = (192, 192)
+ seg = HumanSeg(model_dir, mean, scale, eval_size, use_gpu)
+ if args.use_camera:
+ # 开启摄像头
+ predict_camera(seg)
+ elif args.video_path:
+ # 使用视频文件作为输入
+ predict_video(seg, args.video_path)
+ elif args.img_path:
+ # 使用图片文件作为输入
+ predict_image(seg, args.img_path)
+
+
+def parse_args():
+ """解析命令行参数
+ """
+ parser = argparse.ArgumentParser('Realtime Human Segmentation')
+ parser.add_argument('--model_dir',
+ type=str,
+ default='',
+ help='path of human segmentation model')
+ parser.add_argument('--img_path',
+ type=str,
+ default='',
+ help='path of input image')
+ parser.add_argument('--video_path',
+ type=str,
+ default='',
+ help='path of input video')
+ parser.add_argument('--use_camera',
+ type=bool,
+ default=False,
+ help='input video stream from camera')
+ parser.add_argument('--use_gpu',
+ type=bool,
+ default=False,
+ help='enable gpu')
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ main(args)
diff --git a/contrib/RealTimeHumanSeg/python/requirements.txt b/contrib/RealTimeHumanSeg/python/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..953dae0cf5e2036ad093907b30ac9a3a10858d27
--- /dev/null
+++ b/contrib/RealTimeHumanSeg/python/requirements.txt
@@ -0,0 +1,2 @@
+opencv-python==4.1.2.30
+opencv-contrib-python==4.2.0.32
diff --git a/docs/imgs/fast-scnn.png b/docs/imgs/fast-scnn.png
new file mode 100644
index 0000000000000000000000000000000000000000..2b00eb88401b9981a6d59595bbdf4e1f692db934
Binary files /dev/null and b/docs/imgs/fast-scnn.png differ
diff --git a/docs/imgs/lovasz-hinge.png b/docs/imgs/lovasz-hinge.png
new file mode 100644
index 0000000000000000000000000000000000000000..579b3b7b8fd27acbbde073564e53e2cbdfb3df67
Binary files /dev/null and b/docs/imgs/lovasz-hinge.png differ
diff --git a/docs/imgs/lovasz-softmax.png b/docs/imgs/lovasz-softmax.png
new file mode 100644
index 0000000000000000000000000000000000000000..afb3f2e8e29c1c7925228aa8336b6bb23e1e93ba
Binary files /dev/null and b/docs/imgs/lovasz-softmax.png differ
diff --git a/docs/lovasz_loss.md b/docs/lovasz_loss.md
new file mode 100644
index 0000000000000000000000000000000000000000..1270b3fd0fc677350f8c51a2e7a8ac704af68de5
--- /dev/null
+++ b/docs/lovasz_loss.md
@@ -0,0 +1,116 @@
+# Lovasz loss
+对于图像分割任务中,经常出现类别分布不均匀的情况,例如:工业产品的瑕疵检测、道路提取及病变区域提取等。
+
+我们可使用lovasz loss解决这个问题。Lovasz loss根据分割目标的类别数量可分为两种:lovasz hinge loss适用于二分类问题,lovasz softmax loss适用于多分类问题。
+
+
+## Lovasz hinge loss
+### 使用方式
+
+PaddleSeg通过`cfg.SOLVER.LOSS`参数可以选择训练时的损失函数,
+如`cfg.SOLVER.LOSS=['lovasz_hinge_loss','bce_loss']`将指定训练loss为`lovasz hinge loss`与`bce loss`的组合。
+
+Lovasz hinge loss有3种使用方式:(1)直接训练使用。(2)bce loss结合使用。(3)先使用bec loss进行训练,再使用lovasz hinge loss进行finetuning. 第1种方式不一定达到理想效果,推荐使用后两种方式。本文以第2种方式为例。
+
+### 使用示例
+
+我们以道路提取任务为例应用lovasz hinge loss.
+在DeepGlobe比赛的Road Extraction中,训练数据道路占比为:4.5%. 如下为其图片样例:
+
+
+
+可以看出道路在整张图片中的比例很小。
+
+#### 实验对比
+
+在MiniDeepGlobeRoadExtraction数据集进行了实验对比。
+
+* 数据集下载
+我们从DeepGlobe比赛的Road Extraction的训练集中随机抽取了800张图片作为训练集,200张图片作为验证集,
+制作了一个小型的道路提取数据集[MiniDeepGlobeRoadExtraction](https://paddleseg.bj.bcebos.com/dataset/MiniDeepGlobeRoadExtraction.zip)
+
+```shell
+python dataset/download_mini_deepglobe_road_extraction.py
+```
+
+* 预训练模型下载
+```shell
+python pretrained_model/download_model.py deeplabv3p_mobilenetv2-1-0_bn_coco
+```
+* 配置/数据校验
+```shell
+python pdseg/check.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml
+```
+
+* 训练
+```shell
+python pdseg/train.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_hinge_loss','bce_loss']"
+```
+
+* 评估
+```shell
+python pdseg/eval.py --cfg ./configs/lovasz_hinge_deeplabv3p_mobilenet_road.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_hinge_loss','bce_loss']"
+```
+
+* 结果比较
+
+lovasz hinge loss + bce loss和softmax loss的对比结果如下图所示。
+
+
+
+
+图中蓝色曲线为lovasz hinge loss + bce loss,最高mIoU为76.2%,橙色曲线为softmax loss, 最高mIoU为73.44%,相比提升2.76个百分点。
+
+
+
+## Lovasz softmax loss
+### 使用方式
+
+PaddleSeg通过`cfg.SOLVER.LOSS`参数可以选择训练时的损失函数,
+如`cfg.SOLVER.LOSS=['lovasz_softmax_loss','softmax_loss']`将指定训练loss为`lovasz softmax loss`与`softmax loss`的组合。
+
+Lovasz softmax loss有3种使用方式:(1)直接训练使用。(2)softmax loss结合使用。(3)先使用softmax loss进行训练,再使用lovasz softmax loss进行finetuning. 第1种方式不一定达到理想效果,推荐使用后两种方式。本文以第2种方式为例。
+
+### 使用示例
+
+我们以Pascal voc为例应用lovasz softmax loss.
+
+
+#### 实验对比
+
+在Pascal voc数据集上与softmax loss进行了实验对比。
+
+* 数据集下载
+```shell
+python dataset/download_and_convert_voc2012.py
+```
+
+* 预训练模型下载
+```shell
+python pretrained_model/download_model.py deeplabv3p_mobilenetv2-1-0_bn_coco
+```
+* 配置/数据校验
+```shell
+python pdseg/check.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml
+```
+
+* 训练
+```shell
+python pdseg/train.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_softmax_loss','softmax_loss']"
+
+```
+
+* 评估
+```shell
+python pdseg/eval.py --cfg ./configs/lovasz_softmax_deeplabv3p_mobilenet_pascal.yaml --use_gpu --use_mpio SOLVER.LOSS "['lovasz_softmax_loss','softmax_loss']"
+
+```
+
+* 结果比较
+
+lovasz softmax loss + softmax loss和softmax loss的对比结果如下图所示。
+
+
+
+
+图中橙色曲线代表lovasz softmax loss + softmax loss,最高mIoU为64.63%,蓝色曲线代表softmax loss, 最高mIoU为63.55%,相比提升1.08个百分点。
diff --git a/docs/models.md b/docs/models.md
index a452aa3639c3901d8f75d1aa4f5f1b7f393ce0b7..c36fff5fc88aef362f3ab4f7175b7d60f579e418 100644
--- a/docs/models.md
+++ b/docs/models.md
@@ -5,6 +5,7 @@
- [PSPNet](#PSPNet)
- [ICNet](#ICNet)
- [HRNet](#HRNet)
+- [Fast-SCNN](#Fast-SCNN)
## U-Net
U-Net [1] 起源于医疗图像分割,整个网络是标准的encoder-decoder网络,特点是参数少,计算快,应用性强,对于一般场景适应度很高。U-Net最早于2015年提出,并在ISBI 2015 Cell Tracking Challenge取得了第一。经过发展,目前有多个变形和应用。
@@ -58,6 +59,14 @@ HRNet在人体姿态估计、语义分割和目标检测领域都取得了显著

+### Fast-SCNN
+
+Fast-SCNN [7] 是一个面向实时的语义分割网络。在双分支的结构基础上,大量使用了深度可分离卷积和逆残差(inverted-residual)模块,并且使用特征融合构造金字塔池化模块 (Pyramid Pooling Module)来融合上下文信息。这使得Fast-SCNN在保持高效的情况下能学习到丰富的细节信息。
+
+整个网络结构如下:
+
+
+
## 参考文献
[1] [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597)
@@ -72,3 +81,6 @@ HRNet在人体姿态估计、语义分割和目标检测领域都取得了显著
[6] [Deep High-Resolution Representation Learning for Visual Recognition](https://arxiv.org/abs/1908.07919)
+[7] [Fast-SCNN: Fast Semantic Segmentation Network](https://arxiv.org/abs/1902.04502)
+
+
diff --git a/pdseg/eval.py b/pdseg/eval.py
index b842431fb895c0985da1de5d5ef65073534a9835..426e52f9287bbdb5e4b2e2a4bb617ee910aeff5e 100644
--- a/pdseg/eval.py
+++ b/pdseg/eval.py
@@ -92,10 +92,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
for b in data_gen:
yield b[0], b[1], b[2]
- py_reader, avg_loss, pred, grts, masks = build_model(
+ data_loader, avg_loss, pred, grts, masks = build_model(
test_prog, startup_prog, phase=ModelPhase.EVAL)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
# Get device environment
@@ -128,7 +128,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
timer = Timer()
timer.start()
- py_reader.start()
+ data_loader.start()
while True:
try:
step += 1
diff --git a/pdseg/lovasz_losses.py b/pdseg/lovasz_losses.py
new file mode 100755
index 0000000000000000000000000000000000000000..5f2283583972945ca3d70233a684833ed8902d6c
--- /dev/null
+++ b/pdseg/lovasz_losses.py
@@ -0,0 +1,205 @@
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Lovasz-Softmax and Jaccard hinge loss in PaddlePaddle"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle.fluid as fluid
+import numpy as np
+
+
+def _cumsum(x):
+ y = np.array(x)
+ return np.cumsum(y, axis=0)
+
+
+def create_tmp_var(name, dtype, shape):
+ return fluid.default_main_program().current_block().create_var(
+ name=name, dtype=dtype, shape=shape)
+
+
+def lovasz_grad(gt_sorted):
+ """
+ Computes gradient of the Lovasz extension w.r.t sorted errors
+ See Alg. 1 in paper
+ """
+ gt_sorted = fluid.layers.squeeze(gt_sorted, axes=[1])
+ gts = fluid.layers.reduce_sum(gt_sorted)
+ len_gt = fluid.layers.shape(gt_sorted)
+
+ # Acceleration is achieved by reducing the number of calls to cumsum.
+ # This calculation method is equivalent to that of the original paper.
+ var_one = fluid.layers.fill_constant(shape=[1], value=1, dtype='int32')
+ range_ = fluid.layers.range(1, len_gt + var_one, 1, 'int32')
+ tmp_var = create_tmp_var(
+ name='tmp_var', dtype=gt_sorted.dtype, shape=gt_sorted.shape)
+ cumsum_ = fluid.layers.py_func(func=_cumsum, x=gt_sorted, out=tmp_var)
+ intersection = gts - cumsum_
+ union = intersection + range_
+
+ jaccard = 1.0 - intersection / union
+ jaccard0 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[1])
+ jaccard1 = fluid.layers.slice(jaccard, axes=[0], starts=[1], ends=[len_gt])
+ jaccard2 = fluid.layers.slice(jaccard, axes=[0], starts=[0], ends=[-1])
+ jaccard = fluid.layers.concat([jaccard0, jaccard1 - jaccard2], axis=0)
+ jaccard = fluid.layers.unsqueeze(jaccard, axes=[1])
+ return jaccard
+
+
+def lovasz_hinge(logits, labels, ignore=None):
+ """
+ Binary Lovasz hinge loss
+ logits: [N, C, H, W] Tensor, logits at each pixel (between -\infty and +\infty)
+ labels: [N, 1, H, W] Tensor, binary ground truth masks (0 or 1)
+ ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0
+ """
+ loss = lovasz_hinge_flat(*flatten_binary_scores(logits, labels, ignore))
+ return loss
+
+
+def lovasz_hinge_flat(logits, labels):
+ """
+ Binary Lovasz hinge loss
+ logits: [P] Tensor, logits at each prediction (between -\infty and +\infty)
+ labels: [P] Tensor, binary ground truth labels (0 or 1)
+ """
+ shape = fluid.layers.shape(logits)
+ y = fluid.layers.zeros_like(shape[0])
+
+ out_var = fluid.layers.create_tensor("float32")
+ with fluid.layers.control_flow.Switch() as switch:
+ with switch.case(fluid.layers.equal(shape[0], y)):
+ loss = fluid.layers.reduce_sum(logits) * 0.
+ fluid.layers.assign(input=loss, output=out_var)
+ with switch.case(fluid.layers.greater_than(shape[0], y)):
+ labelsf = fluid.layers.cast(labels, logits.dtype)
+ signs = labelsf * 2 - 1.
+ signs.stop_gradient = True
+ errors = 1.0 - fluid.layers.elementwise_mul(logits, signs)
+ errors_sorted, perm = fluid.layers.argsort(
+ errors, axis=0, descending=True)
+ errors_sorted.stop_gradient = False
+ gt_sorted = fluid.layers.gather(labelsf, perm)
+
+ grad = lovasz_grad(gt_sorted)
+ grad.stop_gradient = True
+ loss = fluid.layers.reduce_sum(
+ fluid.layers.relu(errors_sorted) * grad)
+ fluid.layers.assign(input=loss, output=out_var)
+ return out_var
+
+
+def flatten_binary_scores(scores, labels, ignore=None):
+ """
+ Flattens predictions in the batch (binary case)
+ Remove labels according to 'ignore'
+ """
+ scores = fluid.layers.reshape(scores, [-1, 1])
+ labels = fluid.layers.reshape(labels, [-1, 1])
+ labels.stop_gradient = True
+ if ignore is None:
+ return scores, labels
+ ignore = fluid.layers.cast(ignore, 'int32')
+ ignore_mask = fluid.layers.reshape(ignore, (-1, 1))
+ indexs = fluid.layers.where(ignore_mask == 1)
+ indexs.stop_gradient = True
+ vscores = fluid.layers.gather(scores, indexs[:, 0])
+ vlabels = fluid.layers.gather(labels, indexs[:, 0])
+ return vscores, vlabels
+
+
+def lovasz_softmax(probas, labels, classes='present', ignore=None):
+ """
+ Multi-class Lovasz-Softmax loss
+ probas: [N, C, H, W] Tensor, class probabilities at each prediction (between 0 and 1).
+ labels: [N, 1, H, W] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ ignore: [N, 1, H, W] Tensor. Void class labels, ignore pixels which value=0
+ """
+ vprobas, vlabels = flatten_probas(probas, labels, ignore)
+ loss = lovasz_softmax_flat(vprobas, vlabels, classes=classes)
+ return loss
+
+
+def lovasz_softmax_flat(probas, labels, classes='present'):
+ """
+ Multi-class Lovasz-Softmax loss
+ probas: [P, C] Tensor, class probabilities at each prediction (between 0 and 1)
+ labels: [P] Tensor, ground truth labels (between 0 and C - 1)
+ classes: 'all' for all, 'present' for classes present in labels, or a list of classes to average.
+ """
+ C = probas.shape[1]
+ losses = []
+ present = []
+ classes_to_sum = list(range(C)) if classes in ['all', 'present'
+ ] else classes
+ for c in classes_to_sum:
+ fg = fluid.layers.cast(labels == c, probas.dtype)
+ fg.stop_gradient = True
+ if classes == 'present':
+ present.append(
+ fluid.layers.cast(fluid.layers.reduce_sum(fg) > 0, "int64"))
+ if C == 1:
+ if len(classes_to_sum) > 1:
+ raise ValueError('Sigmoid output possible only with 1 class')
+ class_pred = probas[:, 0]
+ else:
+ class_pred = probas[:, c]
+ errors = fluid.layers.abs(fg - class_pred)
+ errors_sorted, perm = fluid.layers.argsort(
+ errors, axis=0, descending=True)
+ errors_sorted.stop_gradient = False
+
+ fg_sorted = fluid.layers.gather(fg, perm)
+ fg_sorted.stop_gradient = True
+
+ grad = lovasz_grad(fg_sorted)
+ grad.stop_gradient = True
+ loss = fluid.layers.reduce_sum(errors_sorted * grad)
+
+ losses.append(loss)
+
+ if len(classes_to_sum) == 1:
+ return losses[0]
+
+ losses_tensor = fluid.layers.stack(losses)
+ if classes == 'present':
+ present_tensor = fluid.layers.stack(present)
+ index = fluid.layers.where(present_tensor == 1)
+ index.stop_gradient = True
+ losses_tensor = fluid.layers.gather(losses_tensor, index[:, 0])
+ loss = fluid.layers.mean(losses_tensor)
+ return loss
+
+
+def flatten_probas(probas, labels, ignore=None):
+ """
+ Flattens predictions in the batch
+ """
+ if len(probas.shape) == 3:
+ probas = fluid.layers.unsqueeze(probas, axis=[1])
+ C = probas.shape[1]
+ probas = fluid.layers.transpose(probas, [0, 2, 3, 1])
+ probas = fluid.layers.reshape(probas, [-1, C])
+ labels = fluid.layers.reshape(labels, [-1, 1])
+ if ignore is None:
+ return probas, labels
+ ignore = fluid.layers.cast(ignore, 'int32')
+ ignore_mask = fluid.layers.reshape(ignore, [-1, 1])
+ indexs = fluid.layers.where(ignore_mask == 1)
+ indexs.stop_gradient = True
+ vprobas = fluid.layers.gather(probas, indexs[:, 0])
+ vlabels = fluid.layers.gather(labels, indexs[:, 0])
+ return vprobas, vlabels
diff --git a/pdseg/models/backbone/mobilenet_v2.py b/pdseg/models/backbone/mobilenet_v2.py
index ba9c2e7812cb2e19cc839e84b201e45c357cc692..740284b319bd836d9c27682c1c22d556d2b98aa1 100644
--- a/pdseg/models/backbone/mobilenet_v2.py
+++ b/pdseg/models/backbone/mobilenet_v2.py
@@ -308,8 +308,8 @@ def MobileNetV2_scale():
if __name__ == '__main__':
- image_shape = [3, 224, 224]
- image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ image_shape = [-1, 3, 224, 224]
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
model = MobileNetV2_x1_0()
logit, decode_ends = model.net(image)
#print("logit:", logit.shape)
diff --git a/pdseg/models/backbone/xception.py b/pdseg/models/backbone/xception.py
index 09b356973bdafc21952eaa9c88ab43c861677d57..5c07f240625744356c5df4644342cff6c81af687 100644
--- a/pdseg/models/backbone/xception.py
+++ b/pdseg/models/backbone/xception.py
@@ -311,7 +311,7 @@ def xception_71():
if __name__ == '__main__':
- image_shape = [3, 224, 224]
- image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ image_shape = [-1, 3, 224, 224]
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
model = xception_65()
logit = model.net(image)
diff --git a/pdseg/models/model_builder.py b/pdseg/models/model_builder.py
index 668d69e44aeb91cc7705a79f092730ae6a1fdb09..864602247f8edad016c82d10b42c3585df7a2490 100644
--- a/pdseg/models/model_builder.py
+++ b/pdseg/models/model_builder.py
@@ -24,6 +24,8 @@ from utils.config import cfg
from loss import multi_softmax_with_loss
from loss import multi_dice_loss
from loss import multi_bce_loss
+from lovasz_losses import lovasz_hinge
+from lovasz_losses import lovasz_softmax
from models.modeling import deeplab, unet, icnet, pspnet, hrnet, fast_scnn
@@ -166,8 +168,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
width = cfg.EVAL_CROP_SIZE[0]
height = cfg.EVAL_CROP_SIZE[1]
- image_shape = [cfg.DATASET.DATA_DIM, height, width]
- grt_shape = [1, height, width]
+ image_shape = [-1, cfg.DATASET.DATA_DIM, height, width]
+ grt_shape = [-1, 1, height, width]
class_num = cfg.DATASET.NUM_CLASSES
with fluid.program_guard(main_prog, start_prog):
@@ -175,25 +177,26 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
# 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程
# 预测部署时只须对输入图像增加batch_size维度即可
if ModelPhase.is_predict(phase):
- origin_image = fluid.layers.data(
- name='image',
- shape=[-1, -1, -1, cfg.DATASET.DATA_DIM],
- dtype='float32',
- append_batch_size=False)
- image, valid_shape, origin_shape = export_preprocess(
- origin_image)
+ if cfg.SLIM.PREPROCESS:
+ image = fluid.data(
+ name='image', shape=image_shape, dtype='float32')
+ else:
+ origin_image = fluid.data(
+ name='image',
+ shape=[-1, -1, -1, cfg.DATASET.DATA_DIM],
+ dtype='float32')
+ image, valid_shape, origin_shape = export_preprocess(
+ origin_image)
else:
- image = fluid.layers.data(
+ image = fluid.data(
name='image', shape=image_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=grt_shape, dtype='int32')
- mask = fluid.layers.data(
- name='mask', shape=grt_shape, dtype='int32')
+ label = fluid.data(name='label', shape=grt_shape, dtype='int32')
+ mask = fluid.data(name='mask', shape=grt_shape, dtype='int32')
- # use PyReader when doing traning and evaluation
+ # use DataLoader when doing traning and evaluation
if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase):
- py_reader = fluid.io.PyReader(
+ data_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label, mask],
capacity=cfg.DATALOADER.BUF_SIZE,
iterable=False,
@@ -203,19 +206,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
if not isinstance(loss_type, list):
loss_type = list(loss_type)
- # dice_loss或bce_loss只适用两类分割中
- if class_num > 2 and (("dice_loss" in loss_type) or
+ # lovasz_hinge_loss或dice_loss或bce_loss只适用两类分割中
+ if class_num > 2 and (("lovasz_hinge_loss" in loss_type) or
+ ("dice_loss" in loss_type) or
("bce_loss" in loss_type)):
raise Exception(
- "dice loss and bce loss is only applicable to binary classfication"
+ "lovasz hinge loss, dice loss and bce loss are only applicable to binary classfication."
)
- # 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1
- if ("dice_loss" in loss_type) or ("bce_loss" in loss_type):
+ # 在两类分割情况下,当loss函数选择lovasz_hinge_loss或dice_loss或bce_loss的时候,最后logit输出通道数设置为1
+ if ("dice_loss" in loss_type) or ("bce_loss" in loss_type) or (
+ "lovasz_hinge_loss" in loss_type):
class_num = 1
- if "softmax_loss" in loss_type:
+ if ("softmax_loss" in loss_type) or (
+ "lovasz_softmax_loss" in loss_type):
raise Exception(
- "softmax loss can not combine with dice loss or bce loss"
+ "softmax loss or lovasz softmax loss can not combine with bce loss or dice loss or lovasz hinge loss."
)
logits = seg_model(image, class_num)
@@ -227,7 +233,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
if "softmax_loss" in loss_type:
weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT
avg_loss_list.append(
- multi_softmax_with_loss(logits, label, mask, class_num, weight))
+ multi_softmax_with_loss(logits, label, mask, class_num,
+ weight))
loss_valid = True
valid_loss.append("softmax_loss")
if "dice_loss" in loss_type:
@@ -238,11 +245,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
avg_loss_list.append(multi_bce_loss(logits, label, mask))
loss_valid = True
valid_loss.append("bce_loss")
+ if "lovasz_hinge_loss" in loss_type:
+ avg_loss_list.append(
+ lovasz_hinge(logits, label, ignore=mask))
+ loss_valid = True
+ valid_loss.append("lovasz_hinge_loss")
+ if "lovasz_softmax_loss" in loss_type:
+ probas = fluid.layers.softmax(logits, axis=1)
+ avg_loss_list.append(
+ lovasz_softmax(probas, label, ignore=mask))
+ loss_valid = True
+ valid_loss.append("lovasz_softmax_loss")
if not loss_valid:
raise Exception(
"SOLVER.LOSS: {} is set wrong. it should "
- "include one of (softmax_loss, bce_loss, dice_loss) at least"
- " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss']"
+ "include one of (softmax_loss, bce_loss, dice_loss, lovasz_hinge_loss, lovasz_softmax_loss) at least"
+ " example: ['softmax_loss'], ['dice_loss'], ['bce_loss', 'dice_loss'], ['lovasz_hinge_loss','bce_loss'], ['lovasz_softmax_loss','softmax_loss']"
.format(cfg.SOLVER.LOSS))
invalid_loss = [x for x in loss_type if x not in valid_loss]
@@ -253,7 +271,9 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
avg_loss = 0
for i in range(0, len(avg_loss_list)):
- avg_loss += avg_loss_list[i]
+ loss_name = valid_loss[i].upper()
+ loss_weight = eval('cfg.SOLVER.LOSS_WEIGHT.' + loss_name)
+ avg_loss += loss_weight * avg_loss_list[i]
#get pred result in original size
if isinstance(logits, tuple):
@@ -266,22 +286,26 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
# return image input and logit output for inference graph prune
if ModelPhase.is_predict(phase):
- # 两类分割中,使用dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换
+ # 两类分割中,使用lovasz_hinge_loss或dice_loss或bce_loss返回的logit为单通道,进行到两通道的变换
if class_num == 1:
logit = sigmoid_to_softmax(logit)
else:
logit = softmax(logit)
# 获取有效部分
- logit = fluid.layers.slice(
- logit, axes=[2, 3], starts=[0, 0], ends=valid_shape)
-
- logit = fluid.layers.resize_bilinear(
- logit,
- out_shape=origin_shape,
- align_corners=False,
- align_mode=0)
- logit = fluid.layers.argmax(logit, axis=1)
+ if cfg.SLIM.PREPROCESS:
+ return image, logit
+
+ else:
+ logit = fluid.layers.slice(
+ logit, axes=[2, 3], starts=[0, 0], ends=valid_shape)
+
+ logit = fluid.layers.resize_bilinear(
+ logit,
+ out_shape=origin_shape,
+ align_corners=False,
+ align_mode=0)
+ logit = fluid.layers.argmax(logit, axis=1)
return origin_image, logit
if class_num == 1:
@@ -300,12 +324,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
return pred, logit
if ModelPhase.is_eval(phase):
- return py_reader, avg_loss, pred, label, mask
+ return data_loader, avg_loss, pred, label, mask
if ModelPhase.is_train(phase):
optimizer = solver.Solver(main_prog, start_prog)
decayed_lr = optimizer.optimise(avg_loss)
- return py_reader, avg_loss, decayed_lr, pred, label, mask
+ return data_loader, avg_loss, decayed_lr, pred, label, mask
def to_int(string, dest="I"):
diff --git a/pdseg/models/modeling/hrnet.py b/pdseg/models/modeling/hrnet.py
index 741834e157105b233403772f2672ed60aafc488f..4b95461a1adf5c96cd8737540b3509944ea7900e 100644
--- a/pdseg/models/modeling/hrnet.py
+++ b/pdseg/models/modeling/hrnet.py
@@ -202,7 +202,7 @@ def hrnet(input, num_classes):
return logit
if __name__ == '__main__':
- image_shape = [3, 769, 769]
- image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ image_shape = [-1, 3, 769, 769]
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
logit = hrnet(image, 4)
print("logit:", logit.shape)
diff --git a/pdseg/models/modeling/icnet.py b/pdseg/models/modeling/icnet.py
index 354468c9efbe3d3429845f7605927556ef3b505a..f6364ff282b3feea2b2bebe796d34434f59b18ca 100644
--- a/pdseg/models/modeling/icnet.py
+++ b/pdseg/models/modeling/icnet.py
@@ -191,7 +191,7 @@ def icnet(input, num_classes):
if __name__ == '__main__':
- image_shape = [3, 320, 320]
- image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ image_shape = [-1, 3, 320, 320]
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
logit = icnet(image, 4)
print("logit:", logit.shape)
diff --git a/pdseg/models/modeling/unet.py b/pdseg/models/modeling/unet.py
index 0f613a83b7f468cf1573b4f57f36e8e51fefb7ac..215737aa658bcd4046ecaa77bb04fd11a329be24 100644
--- a/pdseg/models/modeling/unet.py
+++ b/pdseg/models/modeling/unet.py
@@ -129,7 +129,7 @@ def unet(input, num_classes):
if __name__ == '__main__':
- image_shape = [3, 320, 320]
- image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
+ image_shape = [-1, 3, 320, 320]
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
logit = unet(image, 4)
print("logit:", logit.shape)
diff --git a/pdseg/train.py b/pdseg/train.py
index 8254f1655c97c09204d2e4a64e2404907270fcfc..9e30c0f2050bd4987d84675985a86922e1c993c3 100644
--- a/pdseg/train.py
+++ b/pdseg/train.py
@@ -103,7 +103,7 @@ def parse_args():
help='If set True, enable continuous evaluation job.'
'This flag is only used for internal test.',
action='store_true')
-
+
# NOTE: This for benchmark
parser.add_argument(
'--is_profiler',
@@ -114,7 +114,7 @@ def parse_args():
'--profiler_path',
help='the profiler output file path.(used for benchmark)',
default='./seg.profiler',
- type=str)
+ type=str)
return parser.parse_args()
@@ -265,9 +265,9 @@ def train(cfg):
batch_size_per_dev = cfg.BATCH_SIZE // dev_count
print_info("batch_size_per_dev: {}".format(batch_size_per_dev))
- py_reader, avg_loss, lr, pred, grts, masks = build_model(
+ data_loader, avg_loss, lr, pred, grts, masks = build_model(
train_prog, startup_prog, phase=ModelPhase.TRAIN)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -386,7 +386,7 @@ def train(cfg):
print_info("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
if args.debug:
@@ -454,16 +454,16 @@ def train(cfg):
sys.stdout.flush()
avg_loss = 0.0
timer.restart()
-
+
# NOTE : used for benchmark, profiler tools
- if args.is_profiler and epoch == 1 and global_step == args.log_steps:
+ if args.is_profiler and epoch == 1 and global_step == args.log_steps:
profiler.start_profiler("All")
elif args.is_profiler and epoch == 1 and global_step == args.log_steps + 5:
profiler.stop_profiler("total", args.profiler_path)
return
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
diff --git a/pdseg/utils/config.py b/pdseg/utils/config.py
index c3d84216752838a388fd2cda1946949d77960fb9..141b17ce24df1f78310975ef236290011ebffb56 100644
--- a/pdseg/utils/config.py
+++ b/pdseg/utils/config.py
@@ -155,10 +155,16 @@ cfg.SOLVER.BEGIN_EPOCH = 1
cfg.SOLVER.NUM_EPOCHS = 30
# loss的选择,支持softmax_loss, bce_loss, dice_loss
cfg.SOLVER.LOSS = ["softmax_loss"]
-# 是否开启warmup学习策略
-cfg.SOLVER.LR_WARMUP = False
+# loss的权重,用于多loss组合加权使用,仅对SOLVER.LOSS内包含的loss生效
+cfg.SOLVER.LOSS_WEIGHT.SOFTMAX_LOSS = 1
+cfg.SOLVER.LOSS_WEIGHT.DICE_LOSS = 1
+cfg.SOLVER.LOSS_WEIGHT.BCE_LOSS = 1
+cfg.SOLVER.LOSS_WEIGHT.LOVASZ_HINGE_LOSS = 1
+cfg.SOLVER.LOSS_WEIGHT.LOVASZ_SOFTMAX_LOSS = 1
+# 是否开启warmup学习策略
+cfg.SOLVER.LR_WARMUP = False
# warmup的迭代次数
-cfg.SOLVER.LR_WARMUP_STEPS = 2000
+cfg.SOLVER.LR_WARMUP_STEPS = 2000
# cross entropy weight, 默认为None,如果设置为'dynamic',会根据每个batch中各个类别的数目,
# 动态调整类别权重。
# 也可以设置一个静态权重(list的方式),比如有3类,每个类别权重可以设置为[0.1, 2.0, 0.9]
@@ -228,7 +234,6 @@ cfg.MODEL.HRNET.STAGE3.NUM_CHANNELS = [40, 80, 160]
cfg.MODEL.HRNET.STAGE4.NUM_MODULES = 3
cfg.MODEL.HRNET.STAGE4.NUM_CHANNELS = [40, 80, 160, 320]
-
########################## 预测部署模型配置 ###################################
# 预测保存的模型名称
cfg.FREEZE.MODEL_FILENAME = '__model__'
@@ -251,4 +256,4 @@ cfg.SLIM.NAS_SPACE_NAME = ""
cfg.SLIM.PRUNE_PARAMS = ''
cfg.SLIM.PRUNE_RATIOS = []
-
+cfg.SLIM.PREPROCESS = False
diff --git a/slim/distillation/README.md b/slim/distillation/README.md
index 2bd772a1001e11efa89324315fa32d44032ade05..d7af90beb3a7fd4fa6bb3775d45b0fd6aadc0133 100644
--- a/slim/distillation/README.md
+++ b/slim/distillation/README.md
@@ -89,7 +89,6 @@ python -m paddle.distributed.launch ./slim/distillation/train_distill.py \
--log_steps 10 --cfg ./slim/distillation/cityscape.yaml \
--teacher_cfg ./slim/distillation/cityscape_teacher.yaml \
--use_gpu \
---use_mpio \
--do_eval
```
diff --git a/slim/distillation/model_builder.py b/slim/distillation/model_builder.py
index f903b8dd2b635fa10070dcc3da488be66746d539..90f53992478e27c94fc8a7817931d3a46e0bb108 100644
--- a/slim/distillation/model_builder.py
+++ b/slim/distillation/model_builder.py
@@ -156,7 +156,10 @@ def export_preprocess(image):
return image, valid_shape, origin_shape
-def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwargs):
+def build_model(main_prog=None,
+ start_prog=None,
+ phase=ModelPhase.TRAIN,
+ **kwargs):
if not ModelPhase.is_valid_phase(phase):
raise ValueError("ModelPhase {} is not valid!".format(phase))
@@ -167,8 +170,8 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
width = cfg.EVAL_CROP_SIZE[0]
height = cfg.EVAL_CROP_SIZE[1]
- image_shape = [cfg.DATASET.DATA_DIM, height, width]
- grt_shape = [1, height, width]
+ image_shape = [-1, cfg.DATASET.DATA_DIM, height, width]
+ grt_shape = [-1, 1, height, width]
class_num = cfg.DATASET.NUM_CLASSES
#with fluid.program_guard(main_prog, start_prog):
@@ -176,36 +179,30 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
# 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程
# 预测部署时只须对输入图像增加batch_size维度即可
if cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER:
- image = main_prog.global_block()._clone_variable(kwargs['image'],
- force_persistable=False)
- label = main_prog.global_block()._clone_variable(kwargs['label'],
- force_persistable=False)
- mask = main_prog.global_block()._clone_variable(kwargs['mask'],
- force_persistable=False)
+ image = main_prog.global_block()._clone_variable(
+ kwargs['image'], force_persistable=False)
+ label = main_prog.global_block()._clone_variable(
+ kwargs['label'], force_persistable=False)
+ mask = main_prog.global_block()._clone_variable(
+ kwargs['mask'], force_persistable=False)
else:
if ModelPhase.is_predict(phase):
- origin_image = fluid.layers.data(
+ origin_image = fluid.data(
name='image',
shape=[-1, -1, -1, cfg.DATASET.DATA_DIM],
- dtype='float32',
- append_batch_size=False)
- image, valid_shape, origin_shape = export_preprocess(
- origin_image)
+ dtype='float32')
+ image, valid_shape, origin_shape = export_preprocess(origin_image)
else:
- image = fluid.layers.data(
- name='image', shape=image_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=grt_shape, dtype='int32')
- mask = fluid.layers.data(
- name='mask', shape=grt_shape, dtype='int32')
+ image = fluid.data(name='image', shape=image_shape, dtype='float32')
+ label = fluid.data(name='label', shape=grt_shape, dtype='int32')
+ mask = fluid.data(name='mask', shape=grt_shape, dtype='int32')
-
- # use PyReader when doing traning and evaluation
+ # use DataLoader.from_generator when doing traning and evaluation
if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase):
- py_reader = None
+ data_loader = None
if not cfg.SLIM.KNOWLEDGE_DISTILL_IS_TEACHER:
- py_reader = fluid.io.PyReader(
+ data_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label, mask],
capacity=cfg.DATALOADER.BUF_SIZE,
iterable=False,
@@ -219,16 +216,14 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
if class_num > 2 and (("dice_loss" in loss_type) or
("bce_loss" in loss_type)):
raise Exception(
- "dice loss and bce loss is only applicable to binary classfication"
- )
+ "dice loss and bce loss is only applicable to binary classfication")
# 在两类分割情况下,当loss函数选择dice_loss或bce_loss的时候,最后logit输出通道数设置为1
if ("dice_loss" in loss_type) or ("bce_loss" in loss_type):
class_num = 1
if "softmax_loss" in loss_type:
raise Exception(
- "softmax loss can not combine with dice loss or bce loss"
- )
+ "softmax loss can not combine with dice loss or bce loss")
logits = seg_model(image, class_num)
# 根据选择的loss函数计算相应的损失函数
@@ -289,10 +284,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
logit, axes=[2, 3], starts=[0, 0], ends=valid_shape)
logit = fluid.layers.resize_bilinear(
- logit,
- out_shape=origin_shape,
- align_corners=False,
- align_mode=0)
+ logit, out_shape=origin_shape, align_corners=False, align_mode=0)
logit = fluid.layers.argmax(logit, axis=1)
return origin_image, logit
@@ -312,7 +304,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
return pred, logit
if ModelPhase.is_eval(phase):
- return py_reader, avg_loss, pred, label, mask
+ return data_loader, avg_loss, pred, label, mask
if ModelPhase.is_train(phase):
decayed_lr = None
@@ -321,7 +313,7 @@ def build_model(main_prog=None, start_prog=None, phase=ModelPhase.TRAIN, **kwarg
decayed_lr = optimizer.optimise(avg_loss)
# optimizer = solver.Solver(main_prog, start_prog)
# decayed_lr = optimizer.optimise(avg_loss)
- return py_reader, avg_loss, decayed_lr, pred, label, mask, image
+ return data_loader, avg_loss, decayed_lr, pred, label, mask, image
def to_int(string, dest="I"):
diff --git a/slim/distillation/train_distill.py b/slim/distillation/train_distill.py
index c1e23253ffcde9eea034bd7f67906ca9e534d2e2..e354107f173eea203d9df3f01f93fae62f41eabc 100644
--- a/slim/distillation/train_distill.py
+++ b/slim/distillation/train_distill.py
@@ -48,6 +48,7 @@ from utils import dist_utils
import solver
from paddleslim.dist.single_distiller import merge, l2_loss
+
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg training')
parser.add_argument(
@@ -260,8 +261,9 @@ def train(cfg):
batch_size_per_dev = cfg.BATCH_SIZE // dev_count
print_info("batch_size_per_dev: {}".format(batch_size_per_dev))
- py_reader, loss, lr, pred, grts, masks, image = build_model(phase=ModelPhase.TRAIN)
- py_reader.decorate_sample_generator(
+ data_loader, loss, lr, pred, grts, masks, image = build_model(
+ phase=ModelPhase.TRAIN)
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -274,8 +276,12 @@ def train(cfg):
with fluid.program_guard(teacher_program, teacher_startup_program):
with fluid.unique_name.guard():
_, teacher_loss, _, _, _, _, _ = build_model(
- teacher_program, teacher_startup_program, phase=ModelPhase.TRAIN, image=image,
- label=grts, mask=masks)
+ teacher_program,
+ teacher_startup_program,
+ phase=ModelPhase.TRAIN,
+ image=image,
+ label=grts,
+ mask=masks)
exe.run(teacher_startup_program)
@@ -293,7 +299,9 @@ def train(cfg):
'mask': 'mask',
}
merge(teacher_program, fluid.default_main_program(), data_name_map, place)
- distill_pairs = [['teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0']]
+ distill_pairs = [[
+ 'teacher_bilinear_interp_2.tmp_0', 'bilinear_interp_0.tmp_0'
+ ]]
def distill(pairs, weight):
"""
@@ -322,7 +330,8 @@ def train(cfg):
build_strategy.fuse_all_optimizer_ops = False
build_strategy.fuse_elewise_add_act_ops = True
if cfg.NUM_TRAINERS > 1 and args.use_gpu:
- dist_utils.prepare_for_multi_process(exe, build_strategy, fluid.default_main_program())
+ dist_utils.prepare_for_multi_process(exe, build_strategy,
+ fluid.default_main_program())
exec_strategy.num_threads = 1
if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
@@ -334,10 +343,11 @@ def train(cfg):
print_info(
"Sync BatchNorm strategy will not be effective if GPU device"
" count <= 1")
- compiled_train_prog = fluid.CompiledProgram(fluid.default_main_program()).with_data_parallel(
- loss_name=all_loss.name,
- exec_strategy=exec_strategy,
- build_strategy=build_strategy)
+ compiled_train_prog = fluid.CompiledProgram(
+ fluid.default_main_program()).with_data_parallel(
+ loss_name=all_loss.name,
+ exec_strategy=exec_strategy,
+ build_strategy=build_strategy)
# Resume training
begin_epoch = cfg.SOLVER.BEGIN_EPOCH
@@ -387,7 +397,9 @@ def train(cfg):
format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
#fetch_list = [avg_loss.name, lr.name]
- fetch_list = [loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name]
+ fetch_list = [
+ loss.name, 'teacher_' + teacher_loss.name, distill_loss.name, lr.name
+ ]
if args.debug:
# Fetch more variable info and use streaming confusion matrix to
@@ -431,7 +443,7 @@ def train(cfg):
print_info("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
if args.debug:
@@ -491,7 +503,8 @@ def train(cfg):
speed = args.log_steps / timer.elapsed_time()
print((
"epoch={} step={} lr={:.5f} loss={:.4f} teacher loss={:.4f} distill loss={:.4f} step/sec={:.3f} | ETA {}"
- ).format(epoch, global_step, lr[0], avg_loss, avg_t_loss, avg_d_loss, speed,
+ ).format(epoch, global_step, lr[0], avg_loss,
+ avg_t_loss, avg_d_loss, speed,
calculate_eta(all_step - global_step, speed)))
if args.use_tb:
log_writer.add_scalar('Train/loss', avg_loss,
@@ -507,7 +520,7 @@ def train(cfg):
timer.restart()
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
diff --git a/slim/nas/README.md b/slim/nas/README.md
index cddfc5a82f07ab0b3f2e2acad6a4c0f7b2ed650c..31e8f93f608002504cdaeaed940e4b41c138e00c 100644
--- a/slim/nas/README.md
+++ b/slim/nas/README.md
@@ -46,7 +46,7 @@ SLIM:
## 训练与评估
执行以下命令,边训练边评估
```shell
-CUDA_VISIBLE_DEVICES=0 python -u ./slim/nas/train_nas.py --log_steps 10 --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml --use_gpu --use_mpio \
+CUDA_VISIBLE_DEVICES=0 python -u ./slim/nas/train_nas.py --log_steps 10 --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml --use_gpu \
SLIM.NAS_PORT 23333 \
SLIM.NAS_ADDRESS "" \
SLIM.NAS_SEARCH_STEPS 2 \
diff --git a/slim/nas/eval_nas.py b/slim/nas/eval_nas.py
index 08f75f5d8ee8d6afbcf9b038e4f8dcf0237a5b56..7f8663dffafb49d7c372f3eeaf0d3ed074f7ce9b 100644
--- a/slim/nas/eval_nas.py
+++ b/slim/nas/eval_nas.py
@@ -45,6 +45,7 @@ from metrics import ConfusionMatrix
from mobilenetv2_search_space import MobileNetV2SpaceSeg
+
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg model evalution')
parser.add_argument(
@@ -98,10 +99,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
for b in data_gen:
yield b[0], b[1], b[2]
- py_reader, avg_loss, pred, grts, masks = build_model(
+ data_loader, avg_loss, pred, grts, masks = build_model(
test_prog, startup_prog, phase=ModelPhase.EVAL, arch=kwargs['arch'])
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
# Get device environment
@@ -134,7 +135,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
timer = Timer()
timer.start()
- py_reader.start()
+ data_loader.start()
while True:
try:
step += 1
diff --git a/slim/nas/model_builder.py b/slim/nas/model_builder.py
index 3dfbacb0cd41a14bb81c6f6c82b81479fb1c30c8..27a14fa77970cad18e017dc825f1708ceb2c9c75 100644
--- a/slim/nas/model_builder.py
+++ b/slim/nas/model_builder.py
@@ -74,9 +74,7 @@ def seg_model(image, class_num, arch):
if model_name == 'deeplabv3p':
logits = deeplab.deeplabv3p_nas(image, class_num, arch)
else:
- raise Exception(
- "unknow model name, only support deeplabv3p"
- )
+ raise Exception("unknow model name, only support deeplabv3p")
return logits
@@ -156,8 +154,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None):
width = cfg.EVAL_CROP_SIZE[0]
height = cfg.EVAL_CROP_SIZE[1]
- image_shape = [cfg.DATASET.DATA_DIM, height, width]
- grt_shape = [1, height, width]
+ image_shape = [-1, cfg.DATASET.DATA_DIM, height, width]
+ grt_shape = [-1, 1, height, width]
class_num = cfg.DATASET.NUM_CLASSES
with fluid.program_guard(main_prog, start_prog):
@@ -165,25 +163,22 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None):
# 在导出模型的时候,增加图像标准化预处理,减小预测部署时图像的处理流程
# 预测部署时只须对输入图像增加batch_size维度即可
if ModelPhase.is_predict(phase):
- origin_image = fluid.layers.data(
+ origin_image = fluid.data(
name='image',
shape=[-1, -1, -1, cfg.DATASET.DATA_DIM],
- dtype='float32',
- append_batch_size=False)
+ dtype='float32')
image, valid_shape, origin_shape = export_preprocess(
origin_image)
else:
- image = fluid.layers.data(
+ image = fluid.data(
name='image', shape=image_shape, dtype='float32')
- label = fluid.layers.data(
- name='label', shape=grt_shape, dtype='int32')
- mask = fluid.layers.data(
- name='mask', shape=grt_shape, dtype='int32')
+ label = fluid.data(name='label', shape=grt_shape, dtype='int32')
+ mask = fluid.data(name='mask', shape=grt_shape, dtype='int32')
- # use PyReader when doing traning and evaluation
+ # use DataLoader.from_generator when doing traning and evaluation
if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase):
- py_reader = fluid.io.PyReader(
+ data_loader = fluid.io.DataLoader.from_generator(
feed_list=[image, label, mask],
capacity=cfg.DATALOADER.BUF_SIZE,
iterable=False,
@@ -217,7 +212,8 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None):
if "softmax_loss" in loss_type:
weight = cfg.SOLVER.CROSS_ENTROPY_WEIGHT
avg_loss_list.append(
- multi_softmax_with_loss(logits, label, mask, class_num, weight))
+ multi_softmax_with_loss(logits, label, mask, class_num,
+ weight))
loss_valid = True
valid_loss.append("softmax_loss")
if "dice_loss" in loss_type:
@@ -290,12 +286,12 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN, arch=None):
return pred, logit
if ModelPhase.is_eval(phase):
- return py_reader, avg_loss, pred, label, mask
+ return data_loader, avg_loss, pred, label, mask
if ModelPhase.is_train(phase):
optimizer = solver.Solver(main_prog, start_prog)
decayed_lr = optimizer.optimise(avg_loss)
- return py_reader, avg_loss, decayed_lr, pred, label, mask
+ return data_loader, avg_loss, decayed_lr, pred, label, mask
def to_int(string, dest="I"):
diff --git a/slim/nas/train_nas.py b/slim/nas/train_nas.py
index 7822657fa264d053360199d5691098ae85fcd12c..6ab4d899dc2406275daf3fecd3738fb4b3b82c49 100644
--- a/slim/nas/train_nas.py
+++ b/slim/nas/train_nas.py
@@ -54,6 +54,7 @@ from paddleslim.analysis import flops
from paddleslim.nas.sa_nas import SANAS
from paddleslim.nas import search_space
+
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg training')
parser.add_argument(
@@ -269,21 +270,24 @@ def train(cfg):
port = cfg.SLIM.NAS_PORT
server_address = (cfg.SLIM.NAS_ADDRESS, port)
- sa_nas = SANAS(config, server_addr=server_address, search_steps=cfg.SLIM.NAS_SEARCH_STEPS,
- is_server=cfg.SLIM.NAS_IS_SERVER)
+ sa_nas = SANAS(
+ config,
+ server_addr=server_address,
+ search_steps=cfg.SLIM.NAS_SEARCH_STEPS,
+ is_server=cfg.SLIM.NAS_IS_SERVER)
for step in range(cfg.SLIM.NAS_SEARCH_STEPS):
arch = sa_nas.next_archs()[0]
start_prog = fluid.Program()
train_prog = fluid.Program()
- py_reader, avg_loss, lr, pred, grts, masks = build_model(
+ data_loader, avg_loss, lr, pred, grts, masks = build_model(
train_prog, start_prog, arch=arch, phase=ModelPhase.TRAIN)
cur_flops = flops(train_prog)
print('current step:', step, 'flops:', cur_flops)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -297,7 +301,8 @@ def train(cfg):
build_strategy = fluid.BuildStrategy()
if cfg.NUM_TRAINERS > 1 and args.use_gpu:
- dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog)
+ dist_utils.prepare_for_multi_process(exe, build_strategy,
+ train_prog)
exec_strategy.num_threads = 1
if cfg.TRAIN.SYNC_BATCH_NORM and args.use_gpu:
@@ -309,10 +314,11 @@ def train(cfg):
print_info(
"Sync BatchNorm strategy will not be effective if GPU device"
" count <= 1")
- compiled_train_prog = fluid.CompiledProgram(train_prog).with_data_parallel(
- loss_name=avg_loss.name,
- exec_strategy=exec_strategy,
- build_strategy=build_strategy)
+ compiled_train_prog = fluid.CompiledProgram(
+ train_prog).with_data_parallel(
+ loss_name=avg_loss.name,
+ exec_strategy=exec_strategy,
+ build_strategy=build_strategy)
# Resume training
begin_epoch = cfg.SOLVER.BEGIN_EPOCH
@@ -353,13 +359,14 @@ def train(cfg):
print_info(
"Parameter[{}] don't exist or shape does not match current network, skip"
" to load it.".format(var.name))
- print_info("{}/{} pretrained parameters loaded successfully!".format(
- len(load_vars),
- len(load_vars) + len(load_fail_vars)))
+ print_info(
+ "{}/{} pretrained parameters loaded successfully!".format(
+ len(load_vars),
+ len(load_vars) + len(load_fail_vars)))
else:
print_info(
'Pretrained model dir {} not exists, training from scratch...'.
- format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
+ format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
fetch_list = [avg_loss.name, lr.name]
@@ -374,8 +381,8 @@ def train(cfg):
timer.start()
if begin_epoch > cfg.SOLVER.NUM_EPOCHS:
raise ValueError(
- ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]").format(
- begin_epoch, cfg.SOLVER.NUM_EPOCHS))
+ ("begin epoch[{}] is larger than cfg.SOLVER.NUM_EPOCHS[{}]"
+ ).format(begin_epoch, cfg.SOLVER.NUM_EPOCHS))
if args.use_mpio:
print_info("Use multiprocess reader")
@@ -384,7 +391,7 @@ def train(cfg):
best_miou = 0.0
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
loss, lr = exe.run(
@@ -398,21 +405,22 @@ def train(cfg):
avg_loss /= args.log_steps
speed = args.log_steps / timer.elapsed_time()
print((
- "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
- ).format(epoch, global_step, lr[0], avg_loss, speed,
- calculate_eta(all_step - global_step, speed)))
+ "epoch={} step={} lr={:.5f} loss={:.4f} step/sec={:.3f} | ETA {}"
+ ).format(epoch, global_step, lr[0], avg_loss, speed,
+ calculate_eta(all_step - global_step, speed)))
sys.stdout.flush()
avg_loss = 0.0
timer.restart()
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
if epoch > cfg.SLIM.NAS_START_EVAL_EPOCH:
- ckpt_dir = save_checkpoint(exe, train_prog, '{}_tmp'.format(port))
+ ckpt_dir = save_checkpoint(exe, train_prog,
+ '{}_tmp'.format(port))
_, mean_iou, _, mean_acc = evaluate(
cfg=cfg,
arch=arch,
@@ -420,7 +428,8 @@ def train(cfg):
use_gpu=args.use_gpu,
use_mpio=args.use_mpio)
if best_miou < mean_iou:
- print('search step {}, epoch {} best iou {}'.format(step, epoch, mean_iou))
+ print('search step {}, epoch {} best iou {}'.format(
+ step, epoch, mean_iou))
best_miou = mean_iou
sa_nas.reward(float(best_miou))
diff --git a/slim/prune/README.md b/slim/prune/README.md
index b6a45238938567a845b44ff768db6982bfeab55c..25505606e3fcc8c8e7c6beba68cdb8d39c1c56b1 100644
--- a/slim/prune/README.md
+++ b/slim/prune/README.md
@@ -46,7 +46,7 @@ SLIM.PRUNE_RATIOS '[0.1,0.1,0.1]'
```shell
CUDA_VISIBLE_DEVICES=0
-python -u ./slim/prune/eval_prune.py --cfg configs/cityscape_fast_scnn.yaml --use_gpu --use_mpio \
+python -u ./slim/prune/eval_prune.py --cfg configs/cityscape_fast_scnn.yaml --use_gpu \
TEST.TEST_MODEL your_trained_model \
```
diff --git a/slim/prune/eval_prune.py b/slim/prune/eval_prune.py
index b8275d03475b8fea67d73682b54a38172fbc25e2..3bfb4f4cf2772da0e6122ec6f6660d90a23c71e2 100644
--- a/slim/prune/eval_prune.py
+++ b/slim/prune/eval_prune.py
@@ -45,6 +45,7 @@ from metrics import ConfusionMatrix
from paddleslim.prune import load_model
+
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg model evalution')
parser.add_argument(
@@ -98,10 +99,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
for b in data_gen:
yield b[0], b[1], b[2]
- py_reader, avg_loss, pred, grts, masks = build_model(
+ data_loader, avg_loss, pred, grts, masks = build_model(
test_prog, startup_prog, phase=ModelPhase.EVAL)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
# Get device environment
@@ -134,7 +135,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
timer = Timer()
timer.start()
- py_reader.start()
+ data_loader.start()
while True:
try:
step += 1
diff --git a/slim/prune/train_prune.py b/slim/prune/train_prune.py
index 06e1658f1a3f721842fbe780820103aceac87a16..05c599e3327728ee1ef5e3f2dea359ab9dab5834 100644
--- a/slim/prune/train_prune.py
+++ b/slim/prune/train_prune.py
@@ -50,6 +50,7 @@ from utils import dist_utils
from paddleslim.prune import Pruner, save_model
from paddleslim.analysis import flops
+
def parse_args():
parser = argparse.ArgumentParser(description='PaddleSeg training')
parser.add_argument(
@@ -181,10 +182,12 @@ def load_checkpoint(exe, program):
return begin_epoch
+
def print_info(*msg):
if cfg.TRAINER_ID == 0:
print(*msg)
+
def train(cfg):
startup_prog = fluid.Program()
train_prog = fluid.Program()
@@ -236,9 +239,9 @@ def train(cfg):
batch_size_per_dev = cfg.BATCH_SIZE // dev_count
print_info("batch_size_per_dev: {}".format(batch_size_per_dev))
- py_reader, avg_loss, lr, pred, grts, masks = build_model(
+ data_loader, avg_loss, lr, pred, grts, masks = build_model(
train_prog, startup_prog, phase=ModelPhase.TRAIN)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -261,8 +264,9 @@ def train(cfg):
print_info("Sync BatchNorm strategy is effective.")
build_strategy.sync_batch_norm = True
else:
- print_info("Sync BatchNorm strategy will not be effective if GPU device"
- " count <= 1")
+ print_info(
+ "Sync BatchNorm strategy will not be effective if GPU device"
+ " count <= 1")
pruned_params = cfg.SLIM.PRUNE_PARAMS.strip().split(',')
pruned_ratios = cfg.SLIM.PRUNE_RATIOS
@@ -311,14 +315,16 @@ def train(cfg):
for var in load_vars:
print_info("Parameter[{}] loaded sucessfully!".format(var.name))
for var in load_fail_vars:
- print_info("Parameter[{}] don't exist or shape does not match current network, skip"
- " to load it.".format(var.name))
+ print_info(
+ "Parameter[{}] don't exist or shape does not match current network, skip"
+ " to load it.".format(var.name))
print_info("{}/{} pretrained parameters loaded successfully!".format(
len(load_vars),
len(load_vars) + len(load_fail_vars)))
else:
- print_info('Pretrained model dir {} not exists, training from scratch...'.
- format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
+ print_info(
+ 'Pretrained model dir {} not exists, training from scratch...'.
+ format(cfg.TRAIN.PRETRAINED_MODEL_DIR))
fetch_list = [avg_loss.name, lr.name]
if args.debug:
@@ -371,7 +377,7 @@ def train(cfg):
print_info("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
if args.debug:
@@ -441,7 +447,7 @@ def train(cfg):
timer.restart()
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
@@ -477,6 +483,7 @@ def train(cfg):
if cfg.TRAINER_ID == 0:
save_prune_checkpoint(exe, train_prog, 'final')
+
def main(args):
if args.cfg_file is not None:
cfg.update_from_file(args.cfg_file)
diff --git a/slim/quantization/README.md b/slim/quantization/README.md
index 9af04033b3a9af84d4b1fdf081f156be6f8dc0c2..28a74e01a64b81d2dc9e6d022a6fab40ed3866f9 100644
--- a/slim/quantization/README.md
+++ b/slim/quantization/README.md
@@ -133,7 +133,20 @@ TRAIN.SYNC_BATCH_NORM False \
BATCH_SIZE 16 \
```
+## 导出模型
+使用脚本[slim/quantization/export_model.py](./export_model.py)导出模型。
+导出命令:
+
+分割库根目录下运行
+```
+python -u ./slim/quantization/export_model.py --not_quant_pattern last_conv --cfg configs/deeplabv3p_mobilenetv2_cityscapes.yaml \
+TEST.TEST_MODEL "./snapshots/mobilenetv2_quant/best_model" \
+MODEL.DEEPLAB.ENCODER_WITH_ASPP False \
+MODEL.DEEPLAB.ENABLE_DECODER False \
+TRAIN.SYNC_BATCH_NORM False \
+SLIM.PREPROCESS True \
+```
## 量化结果
diff --git a/slim/quantization/eval_quant.py b/slim/quantization/eval_quant.py
index f40021df10ac5cabee789ca4de04b7489b37f182..fdf6f3ce18444f85c157a301334aabfdf47869e7 100644
--- a/slim/quantization/eval_quant.py
+++ b/slim/quantization/eval_quant.py
@@ -105,10 +105,10 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
for b in data_gen:
yield b[0], b[1], b[2]
- py_reader, avg_loss, pred, grts, masks = build_model(
+ data_loader, avg_loss, pred, grts, masks = build_model(
test_prog, startup_prog, phase=ModelPhase.EVAL)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, drop_last=False, batch_size=cfg.BATCH_SIZE)
# Get device environment
@@ -152,7 +152,7 @@ def evaluate(cfg, ckpt_dir=None, use_gpu=False, use_mpio=False, **kwargs):
all_step = cfg.DATASET.TEST_TOTAL_IMAGES // cfg.BATCH_SIZE + 1
timer = Timer()
timer.start()
- py_reader.start()
+ data_loader.start()
while True:
try:
step += 1
diff --git a/slim/quantization/export_model.py b/slim/quantization/export_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..3891254bd18470c2af94cb74fe8e06e942b86cb5
--- /dev/null
+++ b/slim/quantization/export_model.py
@@ -0,0 +1,149 @@
+# coding: utf8
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import sys
+import time
+import pprint
+import cv2
+import argparse
+import numpy as np
+import paddle.fluid as fluid
+
+from utils.config import cfg
+from models.model_builder import build_model
+from models.model_builder import ModelPhase
+from paddleslim.quant import quant_aware, convert
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(
+ description='PaddleSeg Inference Model Exporter')
+ parser.add_argument(
+ '--cfg',
+ dest='cfg_file',
+ help='Config file for training (and optionally testing)',
+ default=None,
+ type=str)
+ parser.add_argument(
+ "--not_quant_pattern",
+ nargs='+',
+ type=str,
+ help=
+ "Layers which name_scope contains string in not_quant_pattern will not be quantized"
+ )
+ parser.add_argument(
+ 'opts',
+ help='See utils/config.py for all options',
+ default=None,
+ nargs=argparse.REMAINDER)
+ if len(sys.argv) == 1:
+ parser.print_help()
+ sys.exit(1)
+ return parser.parse_args()
+
+
+def export_inference_config():
+ deploy_cfg = '''DEPLOY:
+ USE_GPU : 1
+ MODEL_PATH : "%s"
+ MODEL_FILENAME : "%s"
+ PARAMS_FILENAME : "%s"
+ EVAL_CROP_SIZE : %s
+ MEAN : %s
+ STD : %s
+ IMAGE_TYPE : "%s"
+ NUM_CLASSES : %d
+ CHANNELS : %d
+ PRE_PROCESSOR : "SegPreProcessor"
+ PREDICTOR_MODE : "ANALYSIS"
+ BATCH_SIZE : 1
+ ''' % (cfg.FREEZE.SAVE_DIR, cfg.FREEZE.MODEL_FILENAME,
+ cfg.FREEZE.PARAMS_FILENAME, cfg.EVAL_CROP_SIZE, cfg.MEAN, cfg.STD,
+ cfg.DATASET.IMAGE_TYPE, cfg.DATASET.NUM_CLASSES, len(cfg.STD))
+ if not os.path.exists(cfg.FREEZE.SAVE_DIR):
+ os.mkdir(cfg.FREEZE.SAVE_DIR)
+ yaml_path = os.path.join(cfg.FREEZE.SAVE_DIR, 'deploy.yaml')
+ with open(yaml_path, "w") as fp:
+ fp.write(deploy_cfg)
+ return yaml_path
+
+
+def export_inference_model(args):
+ """
+ Export PaddlePaddle inference model for prediction depolyment and serving.
+ """
+ print("Exporting inference model...")
+ startup_prog = fluid.Program()
+ infer_prog = fluid.Program()
+ image, logit_out = build_model(
+ infer_prog, startup_prog, phase=ModelPhase.PREDICT)
+
+ # Use CPU for exporting inference model instead of GPU
+ place = fluid.CPUPlace()
+ exe = fluid.Executor(place)
+ exe.run(startup_prog)
+ infer_prog = infer_prog.clone(for_test=True)
+ not_quant_pattern_list = []
+ if args.not_quant_pattern is not None:
+ not_quant_pattern_list = args.not_quant_pattern
+
+ config = {
+ 'weight_quantize_type': 'channel_wise_abs_max',
+ 'activation_quantize_type': 'moving_average_abs_max',
+ 'quantize_op_types': ['depthwise_conv2d', 'mul', 'conv2d'],
+ 'not_quant_pattern': not_quant_pattern_list
+ }
+
+ infer_prog = quant_aware(infer_prog, place, config, for_test=True)
+ if os.path.exists(cfg.TEST.TEST_MODEL):
+ fluid.io.load_persistables(
+ exe, cfg.TEST.TEST_MODEL, main_program=infer_prog)
+ else:
+ print("TEST.TEST_MODEL diretory is empty!")
+ exit(-1)
+
+ infer_prog = convert(infer_prog, place, config)
+
+ fluid.io.save_inference_model(
+ cfg.FREEZE.SAVE_DIR,
+ feeded_var_names=[image.name],
+ target_vars=[logit_out],
+ executor=exe,
+ main_program=infer_prog,
+ model_filename=cfg.FREEZE.MODEL_FILENAME,
+ params_filename=cfg.FREEZE.PARAMS_FILENAME)
+ print("Inference model exported!")
+ print("Exporting inference model config...")
+ deploy_cfg_path = export_inference_config()
+ print("Inference model saved : [%s]" % (deploy_cfg_path))
+
+
+def main():
+ args = parse_args()
+ if args.cfg_file is not None:
+ cfg.update_from_file(args.cfg_file)
+ if args.opts:
+ cfg.update_from_list(args.opts)
+ cfg.check_and_infer()
+ print(pprint.pformat(cfg))
+ export_inference_model(args)
+
+
+if __name__ == '__main__':
+ main()
diff --git a/slim/quantization/train_quant.py b/slim/quantization/train_quant.py
index 6a29dccdbaeda54b06c11299fb37e979cec6e401..1034b7234d73f21c41085d5a04d74069b04de7ca 100644
--- a/slim/quantization/train_quant.py
+++ b/slim/quantization/train_quant.py
@@ -157,9 +157,9 @@ def train_quant(cfg):
batch_size_per_dev = cfg.BATCH_SIZE // dev_count
print_info("batch_size_per_dev: {}".format(batch_size_per_dev))
- py_reader, avg_loss, lr, pred, grts, masks = build_model(
+ data_loader, avg_loss, lr, pred, grts, masks = build_model(
train_prog, startup_prog, phase=ModelPhase.TRAIN)
- py_reader.decorate_sample_generator(
+ data_loader.set_sample_generator(
data_generator, batch_size=batch_size_per_dev, drop_last=drop_last)
exe = fluid.Executor(place)
@@ -274,7 +274,7 @@ def train_quant(cfg):
print_info("Use multi-thread reader")
for epoch in range(begin_epoch, cfg.SOLVER.NUM_EPOCHS + 1):
- py_reader.start()
+ data_loader.start()
while True:
try:
if args.debug:
@@ -326,7 +326,7 @@ def train_quant(cfg):
timer.restart()
except fluid.core.EOFException:
- py_reader.reset()
+ data_loader.reset()
break
except Exception as e:
print(e)
diff --git a/turtorial/finetune_fast_scnn.md b/turtorial/finetune_fast_scnn.md
index 188a51edf9d138bb6832849c9ab2ad8afbcd3cd4..31541b796849277085104abf1df13284e264fae8 100644
--- a/turtorial/finetune_fast_scnn.md
+++ b/turtorial/finetune_fast_scnn.md
@@ -114,6 +114,6 @@ python pdseg/eval.py --use_gpu --cfg ./configs/fast_scnn_pet.yaml
| ICNet/bn |(1024, 2048) |8.76ms| 0.6831 |
| Fast-SCNN/bn | (1024, 2048) |6.28ms| 0.6964 |
-上述测试环境为v100. 测试使用paddle的推理接口[zero_copy](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_usage/deploy/inference/python_infer_cn.html#id8)的方式,模型输出是类别,即argmax后的值。
+上述测试环境为v100. 测试使用paddle的推理接口[zero_copy](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/advanced_guide/inference_deployment/inference/python_infer_cn.html#id8)的方式,模型输出是类别,即argmax后的值。