se_resnext.py 9.6 KB
Newer Older
W
wangmeng28 已提交
1
import os
2 3 4
import numpy as np
import time
import sys
W
wangmeng28 已提交
5
import paddle.v2 as paddle
L
Luo Tao 已提交
6
import paddle.fluid as fluid
W
wangmeng28 已提交
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
import reader


def conv_bn_layer(input, num_filters, filter_size, stride=1, groups=1,
                  act=None):
    conv = fluid.layers.conv2d(
        input=input,
        num_filters=num_filters,
        filter_size=filter_size,
        stride=stride,
        padding=(filter_size - 1) / 2,
        groups=groups,
        act=None,
        bias_attr=False)
    return fluid.layers.batch_norm(input=conv, act=act)


def squeeze_excitation(input, num_channels, reduction_ratio):
    pool = fluid.layers.pool2d(
        input=input, pool_size=0, pool_type='avg', global_pooling=True)
Y
ying 已提交
27 28 29 30 31 32
    squeeze = fluid.layers.fc(input=pool,
                              size=num_channels / reduction_ratio,
                              act='relu')
    excitation = fluid.layers.fc(input=squeeze,
                                 size=num_channels,
                                 act='sigmoid')
W
wangmeng28 已提交
33 34 35 36 37 38 39
    scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
    return scale


def shortcut(input, ch_out, stride):
    ch_in = input.shape[1]
    if ch_in != ch_out:
W
wangmeng28 已提交
40 41 42 43 44
        if stride == 1:
            filter_size = 1
        else:
            filter_size = 3
        return conv_bn_layer(input, ch_out, filter_size, stride)
W
wangmeng28 已提交
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70
    else:
        return input


def bottleneck_block(input, num_filters, stride, cardinality, reduction_ratio):
    conv0 = conv_bn_layer(
        input=input, num_filters=num_filters, filter_size=1, act='relu')
    conv1 = conv_bn_layer(
        input=conv0,
        num_filters=num_filters,
        filter_size=3,
        stride=stride,
        groups=cardinality,
        act='relu')
    conv2 = conv_bn_layer(
        input=conv1, num_filters=num_filters * 2, filter_size=1, act=None)
    scale = squeeze_excitation(
        input=conv2,
        num_channels=num_filters * 2,
        reduction_ratio=reduction_ratio)

    short = shortcut(input, num_filters * 2, stride)

    return fluid.layers.elementwise_add(x=short, y=scale, act='relu')


71 72 73 74 75 76 77 78 79 80 81
def SE_ResNeXt(input, class_dim, infer=False, layers=50):
    supported_layers = [50, 152]
    if layers not in supported_layers:
        print("supported layers are", supported_layers, "but input layer is ",
              layers)
        exit()
    if layers == 50:
        cardinality = 32
        reduction_ratio = 16
        depth = [3, 4, 6, 3]
        num_filters = [128, 256, 512, 1024]
W
wangmeng28 已提交
82

83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
        conv = conv_bn_layer(
            input=input, num_filters=64, filter_size=7, stride=2, act='relu')
        conv = fluid.layers.pool2d(
            input=conv,
            pool_size=3,
            pool_stride=2,
            pool_padding=1,
            pool_type='max')
    elif layers == 152:
        cardinality = 64
        reduction_ratio = 16
        depth = [3, 8, 36, 3]
        num_filters = [128, 256, 512, 1024]

        conv = conv_bn_layer(
            input=input, num_filters=64, filter_size=3, stride=2, act='relu')
        conv = conv_bn_layer(
            input=conv, num_filters=64, filter_size=3, stride=1, act='relu')
        conv = conv_bn_layer(
            input=conv, num_filters=128, filter_size=3, stride=1, act='relu')
        conv = fluid.layers.pool2d(
            input=conv,
            pool_size=3,
            pool_stride=2,
            pool_padding=1,
            pool_type='max')
W
wangmeng28 已提交
109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128

    for block in range(len(depth)):
        for i in range(depth[block]):
            conv = bottleneck_block(
                input=conv,
                num_filters=num_filters[block],
                stride=2 if i == 0 and block != 0 else 1,
                cardinality=cardinality,
                reduction_ratio=reduction_ratio)

    pool = fluid.layers.pool2d(
        input=conv, pool_size=0, pool_type='avg', global_pooling=True)
    if not infer:
        drop = fluid.layers.dropout(x=pool, dropout_prob=0.2)
    else:
        drop = pool
    out = fluid.layers.fc(input=drop, size=class_dim, act='softmax')
    return out


129 130 131 132
def train(learning_rate,
          batch_size,
          num_passes,
          init_model=None,
W
wangmeng28 已提交
133
          model_save_dir='model',
134 135 136 137
          parallel=True,
          use_nccl=True,
          lr_strategy=None,
          layers=50):
W
wangmeng28 已提交
138 139 140 141 142 143
    class_dim = 1000
    image_shape = [3, 224, 224]

    image = fluid.layers.data(name='image', shape=image_shape, dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

W
wangmeng28 已提交
144 145
    if parallel:
        places = fluid.layers.get_places()
146
        pd = fluid.layers.ParallelDo(places, use_nccl=use_nccl)
W
wangmeng28 已提交
147 148 149 150

        with pd.do():
            image_ = pd.read_input(image)
            label_ = pd.read_input(label)
151
            out = SE_ResNeXt(input=image_, class_dim=class_dim, layers=layers)
W
wangmeng28 已提交
152 153
            cost = fluid.layers.cross_entropy(input=out, label=label_)
            avg_cost = fluid.layers.mean(x=cost)
154 155
            acc_top1 = fluid.layers.accuracy(input=out, label=label_, k=1)
            acc_top5 = fluid.layers.accuracy(input=out, label=label_, k=5)
W
wangmeng28 已提交
156
            pd.write_output(avg_cost)
157 158
            pd.write_output(acc_top1)
            pd.write_output(acc_top5)
W
wangmeng28 已提交
159

160
        avg_cost, acc_top1, acc_top5 = pd()
W
wangmeng28 已提交
161
        avg_cost = fluid.layers.mean(x=avg_cost)
162 163
        acc_top1 = fluid.layers.mean(x=acc_top1)
        acc_top5 = fluid.layers.mean(x=acc_top5)
W
wangmeng28 已提交
164
    else:
165
        out = SE_ResNeXt(input=image, class_dim=class_dim, layers=layers)
W
wangmeng28 已提交
166 167
        cost = fluid.layers.cross_entropy(input=out, label=label)
        avg_cost = fluid.layers.mean(x=cost)
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
        acc_top1 = fluid.layers.accuracy(input=out, label=label, k=1)
        acc_top5 = fluid.layers.accuracy(input=out, label=label, k=5)

    if lr_strategy is None:
        optimizer = fluid.optimizer.Momentum(
            learning_rate=learning_rate,
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
    else:
        bd = lr_strategy["bd"]
        lr = lr_strategy["lr"]
        optimizer = fluid.optimizer.Momentum(
            learning_rate=fluid.layers.piecewise_decay(
                boundaries=bd, values=lr),
            momentum=0.9,
            regularization=fluid.regularizer.L2Decay(1e-4))
W
wangmeng28 已提交
184 185

    opts = optimizer.minimize(avg_cost)
186
    fluid.memory_optimize(fluid.default_main_program())
W
wangmeng28 已提交
187 188 189

    inference_program = fluid.default_main_program().clone()
    with fluid.program_guard(inference_program):
190 191
        inference_program = fluid.io.get_inference_program(
            [avg_cost, acc_top1, acc_top5])
W
wangmeng28 已提交
192 193 194 195 196

    place = fluid.CUDAPlace(0)
    exe = fluid.Executor(place)
    exe.run(fluid.default_startup_program())

197
    if init_model is not None:
W
wangmeng28 已提交
198
        fluid.io.load_persistables(exe, init_model)
199

W
wangmeng28 已提交
200 201
    train_reader = paddle.batch(reader.train(), batch_size=batch_size)
    test_reader = paddle.batch(reader.test(), batch_size=batch_size)
W
wangmeng28 已提交
202 203 204
    feeder = fluid.DataFeeder(place=place, feed_list=[image, label])

    for pass_id in range(num_passes):
205 206
        train_info = [[], [], []]
        test_info = [[], [], []]
W
wangmeng28 已提交
207
        for batch_id, data in enumerate(train_reader()):
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
            t1 = time.time()
            loss, acc1, acc5 = exe.run(
                fluid.default_main_program(),
                feed=feeder.feed(data),
                fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            train_info[0].append(loss[0])
            train_info[1].append(acc1[0])
            train_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print(
                    "Pass {0}, trainbatch {1}, loss {2}, acc1 {3}, acc5 {4} time {5}".
                    format(pass_id, batch_id, loss[0], acc1[0], acc5[0],
                           "%2.2f sec" % period))
                sys.stdout.flush()

        train_loss = np.array(train_info[0]).mean()
        train_acc1 = np.array(train_info[1]).mean()
        train_acc5 = np.array(train_info[2]).mean()
W
wangmeng28 已提交
228
        for data in test_reader():
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
            t1 = time.time()
            loss, acc1, acc5 = exe.run(
                inference_program,
                feed=feeder.feed(data),
                fetch_list=[avg_cost, acc_top1, acc_top5])
            t2 = time.time()
            period = t2 - t1
            test_info[0].append(loss[0])
            test_info[1].append(acc1[0])
            test_info[2].append(acc5[0])
            if batch_id % 10 == 0:
                print(
                    "Pass {0}, testbatch {1}, loss {2}, acc1 {3}, acc5 {4} time {5}".
                    format(pass_id, batch_id, loss[0], acc1[0], acc5[0],
                           "%2.2f sec" % period))
                sys.stdout.flush()

        test_loss = np.array(test_info[0]).mean()
        test_acc1 = np.array(test_info[1]).mean()
        test_acc5 = np.array(test_info[2]).mean()

        print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3},\
              test_loss {4}, test_acc1 {5}, test_acc5 {6}"
                                                          .format(pass_id,  \
              train_loss, train_acc1, train_acc5, test_loss, test_acc1, test_acc5))
        sys.stdout.flush()
W
wangmeng28 已提交
255 256

        model_path = os.path.join(model_save_dir, str(pass_id))
257 258 259
        if not os.path.isdir(model_path):
            os.makedirs(model_path)
        fluid.io.save_persistables(exe, model_path)
W
wangmeng28 已提交
260 261 262


if __name__ == '__main__':
263 264 265 266 267 268 269 270 271 272 273 274 275
    epoch_points = [30, 60, 90]
    total_images = 1281167
    batch_size = 256
    step = int(total_images / batch_size + 1)
    bd = [e * step for e in epoch_points]
    lr = [0.1, 0.01, 0.001, 0.0001]

    lr_strategy = {"bd": bd, "lr": lr}

    use_nccl = True
    # layers: 50, 152
    layers = 50

W
wangmeng28 已提交
276 277
    train(
        learning_rate=0.1,
278 279
        batch_size=batch_size,
        num_passes=120,
W
wangmeng28 已提交
280
        init_model=None,
281 282 283 284
        parallel=True,
        use_nccl=True,
        lr_strategy=lr_strategy,
        layers=layers)