From dba781af3fe931f6cf7a96d656a0358ac09f8fc0 Mon Sep 17 00:00:00 2001
From: wuyefeilin <30919197+wuyefeilin@users.noreply.github.com>
Date: Tue, 26 Nov 2019 19:23:44 +0800
Subject: [PATCH] add preprocess and argmax to export process (#98)

* add preprocess and argmax to export process

* add comments
---
 pdseg/models/model_builder.py | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

diff --git a/pdseg/models/model_builder.py b/pdseg/models/model_builder.py
index 2b26c662..12805164 100644
--- a/pdseg/models/model_builder.py
+++ b/pdseg/models/model_builder.py
@@ -140,8 +140,25 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
 
     with fluid.program_guard(main_prog, start_prog):
         with fluid.unique_name.guard():
-            image = fluid.layers.data(
-                name='image', shape=image_shape, dtype='float32')
+            # 在导出模型的时候，增加图像标准化预处理,减小预测部署时图像的处理流程
+            # 预测部署时只须对输入图像增加batch_size维度即可
+            if ModelPhase.is_predict(phase):
+                origin_image = fluid.layers.data(name='image', 
+                        shape=[ -1, 1, 1, cfg.DATASET.DATA_DIM], 
+                        dtype='float32', 
+                        append_batch_size=False)
+                image = fluid.layers.transpose(origin_image, [0, 3, 1, 2])
+                origin_shape = fluid.layers.shape(image)[-2:]
+                mean = np.array(cfg.MEAN).reshape(1, len(cfg.MEAN), 1, 1)
+                mean = fluid.layers.assign(mean.astype('float32'))
+                std = np.array(cfg.STD).reshape(1, len(cfg.STD), 1, 1)
+                std = fluid.layers.assign(std.astype('float32'))
+                image = (image/255 - mean)/std
+                image = fluid.layers.resize_bilinear(image, 
+                        out_shape=[height, width], align_corners=False, align_mode=0)
+            else:
+                image = fluid.layers.data(
+                    name='image', shape=image_shape, dtype='float32')
             label = fluid.layers.data(
                 name='label', shape=grt_shape, dtype='int32')
             mask = fluid.layers.data(
@@ -162,9 +179,11 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
             if not isinstance(loss_type, list):
                 loss_type = list(loss_type)
 
+            # dice_loss或bce_loss只适用两类分割中
             if class_num > 2 and (("dice_loss" in loss_type) or ("bce_loss" in loss_type)):
                 raise Exception("dice loss and bce loss is only applicable to binary classfication")
             
+            # 在两类分割情况下，当loss函数选择dice_loss或bce_loss的时候，最后logit输出通道数设置为1
             if ("dice_loss" in loss_type) or ("bce_loss" in loss_type):
                 class_num = 1
                 if "softmax_loss" in loss_type:
@@ -172,6 +191,7 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
             
             logits = model_func(image, class_num)
 
+            # 根据选择的loss函数计算相应的损失函数
             if ModelPhase.is_train(phase) or ModelPhase.is_eval(phase):
                 loss_valid = False
                 avg_loss_list = []
@@ -213,11 +233,15 @@ def build_model(main_prog, start_prog, phase=ModelPhase.TRAIN):
 
             # return image input and logit output for inference graph prune
             if ModelPhase.is_predict(phase):
+                # 两类分割中，使用dice_loss或bce_loss返回的logit为单通道，进行到两通道的变换
                 if class_num == 1:
                     logit = sigmoid_to_softmax(logit)
                 else:
                     logit = softmax(logit)
-                return image, logit
+                logit = fluid.layers.resize_bilinear(logit, out_shape=origin_shape, align_corners=False, align_mode=0)
+                logit = fluid.layers.transpose(logit, [0, 2, 3, 1])
+                logit = fluid.layers.argmax(logit, axis=3)
+                return origin_image, logit
 
             if class_num == 1:
                 out = sigmoid_to_softmax(logit)
-- 
GitLab