Update hype parameter for DeepLab (#1926) (#1928)

* fix some hyper parameters * Update README

Update hype parameter for DeepLab (#1926) (#1928)
* fix some hyper parameters * Update README
b192374b · qingqing01 · GitHub · dc76c32f · b192374b · b192374b
5 changed file
--- a/fluid/PaddleCV/deeplabv3+/.gitignore
+++ b/fluid/PaddleCV/deeplabv3+/.gitignore
-deeplabv3plus_xception65_initialize.params
-deeplabv3plus.params
-deeplabv3plus.tar.gz
+*.tgz
+deeplabv3plus_gn_init*
+deeplabv3plus_xception65_initialize*
+*.log
+*.sh
+output*
--- a/fluid/PaddleCV/deeplabv3+/README.md
+++ b/fluid/PaddleCV/deeplabv3+/README.md
@@ -72,20 +72,19 @@ python train.py --help
 以上命令用于测试训练过程是否正常，仅仅迭代了50次并且使用了1的batch size，如果需要复现
 原论文的实验，请使用以下设置：
 ```
+CUDA_VISIBLE_DEVICES=0 \
 python ./train.py \
-    --batch_size=8 \
+    --batch_size=4 \
    --parallel=True \
    --norm_type=gn \
    --train_crop_size=769 \
-    --total_step=90000 \
+    --total_step=500000 \
    --base_lr=0.001 \
    --init_weights_path=deeplabv3plus_gn_init \
    --save_weights_path=output \
    --dataset_path=$DATASET_PATH
 ```
-如果您的显存不足，可以尝试减小`batch_size`，同时等比例放大`total_step`, 保证相乘的值不变，这得益于Group Norm的特性，改变 `batch_size` 并不会显著影响结果，而且能够节约更多显存, 比如您可以设置`--batch_size=4 --total_step=180000`。
-
-如果您希望使用多卡进行训练，可以同比增加`batch_size`，减小`total_step`, 比如原来单卡训练是`--batch_size=4 --total_step=180000`，使用4卡训练则是`--batch_size=16 --total_step=45000`
+如果您的显存不足，可以尝试减小`batch_size`，同时等比例放大`total_step`, 缩小`base_lr`, 保证相乘的值不变，这得益于Group Norm的特性，改变 `batch_size` 并不会显著影响结果，而且能够节约更多显存, 比如您可以设置`--batch_size=2 --total_step=1000000 --base_lr=0.0005`。

 ### 测试
 执行以下命令在`Cityscape`测试数据集上进行测试：
@@ -110,7 +109,6 @@ step: 500, mIoU: 0.7881

 |数据集 | norm type | pretrained model | trained model | mean IoU
 |---|---|---|---|---|
-|CityScape | batch norm | [deeplabv3plus_xception65_initialize.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_xception65_initialize.tgz) | [deeplabv3plus.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus.tgz) | 0.7873 |
 |CityScape | group norm | [deeplabv3plus_gn_init.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn_init.tgz) | [deeplabv3plus_gn.tgz](https://paddle-deeplab.bj.bcebos.com/deeplabv3plus_gn.tgz) | 0.7881 |

 ## 参考

--- a/fluid/PaddleCV/deeplabv3+/eval.py
+++ b/fluid/PaddleCV/deeplabv3+/eval.py
@@ -137,7 +137,4 @@ for i in range(total_step):
    all_correct = right.copy()
    mp = (wrong + right) != 0
    miou2 = np.mean((right[mp] * 1.0 / (right[mp] + wrong[mp])))
-    if args.verbose:
-        print('step: %s, mIoU: %s' % (i + 1, miou2), flush=True)
-    else:
-        print('\rstep: %s, mIoU: %s' % (i + 1, miou2), end='\r', flush=True)
+    print('step: %s, mIoU: %s' % (i + 1, miou2))
--- a/fluid/PaddleCV/deeplabv3+/reader.py
+++ b/fluid/PaddleCV/deeplabv3+/reader.py
@@ -9,7 +9,7 @@ import six
 default_config = {
    "shuffle": True,
    "min_resize": 0.5,
-    "max_resize": 2,
+    "max_resize": 4,
    "crop_size": 769,
 }

@@ -90,9 +90,21 @@ class CityscapeDataset:
                break
        if shape == -1:
            return img, label, ln
-        random_scale = np.random.rand(1) * (self.config['max_resize'] -
-                                            self.config['min_resize']
-                                            ) + self.config['min_resize']
+
+        if np.random.rand() > 0.5:
+            range_l = 1
+            range_r = self.config['max_resize']
+        else:
+            range_l = self.config['min_resize']
+            range_r = 1
+
+        if np.random.rand() > 0.5:
+            assert len(img.shape) == 3 and len(
+                label.shape) == 3, "{} {}".format(img.shape, label.shape)
+            img = img[:, :, ::-1]
+            label = label[:, :, ::-1]
+
+        random_scale = np.random.rand(1) * (range_r - range_l) + range_l
        crop_size = int(shape / random_scale)
        bb = crop_size // 2


--- a/fluid/PaddleCV/deeplabv3+/train.py
+++ b/fluid/PaddleCV/deeplabv3+/train.py
@@ -21,10 +21,10 @@ parser = argparse.ArgumentParser()
 add_arg = lambda *args: utility.add_arguments(*args, argparser=parser)

 # yapf: disable
-add_arg('batch_size',           int,    2,      "The number of images in each batch during training.")
+add_arg('batch_size',           int,    4,      "The number of images in each batch during training.")
 add_arg('train_crop_size',      int,    769,    "Image crop size during training.")
-add_arg('base_lr',              float,  0.0001, "The base learning rate for model training.")
-add_arg('total_step',           int,    90000,  "Number of the training step.")
+add_arg('base_lr',              float,  0.001,  "The base learning rate for model training.")
+add_arg('total_step',           int,    500000, "Number of the training step.")
 add_arg('init_weights_path',    str,    None,   "Path of the initial weights in paddlepaddle format.")
 add_arg('save_weights_path',    str,    None,   "Path of the saved weights during training.")
 add_arg('dataset_path',         str,    None,   "Cityscape dataset path.")
@@ -39,7 +39,7 @@ add_arg('use_py_reader',        bool,    True,  "Use py reader.")
 parser.add_argument(
    '--enable_ce',
    action='store_true',
-    help='If set, run the task with continuous evaluation logs.')
+    help='If set, run the task with continuous evaluation logs. Users can ignore this agument.')
 #yapf: enable

 @contextlib.contextmanager
@@ -87,7 +87,8 @@ def loss(logit, label):
    label = fluid.layers.reshape(label, [-1, 1])
    label = fluid.layers.cast(label, 'int64')
    label_nignore = fluid.layers.reshape(label_nignore, [-1, 1])
-    loss = fluid.layers.softmax_with_cross_entropy(logit, label, ignore_index=255, numeric_stable_mode=True)
+    logit = fluid.layers.softmax(logit, use_cudnn=False)
+    loss = fluid.layers.cross_entropy(logit, label, ignore_index=255)
    label_nignore.stop_gradient = True
    label.stop_gradient = True
    return loss, label_nignore