vgg.py 2.8 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python
from paddle.trainer_config_helpers import *

height = 224
width = 224
num_class = 1000
batch_size = get_config_arg('batch_size', int, 64)
8
layer_num = get_config_arg('layer_num', int, 19)
T
tensor-tang 已提交
9
is_infer = get_config_arg("is_infer", bool, False)
10

T
tensor-tang 已提交
11 12 13 14 15 16 17
args = {
    'height': height,
    'width': width,
    'color': True,
    'num_class': num_class,
    'is_infer': is_infer
}
18
define_py_data_sources2(
19 20 21 22 23
    "train.list" if not is_infer else None,
    "test.list" if is_infer else None,
    module="provider",
    obj="process",
    args=args)
24 25 26

settings(
    batch_size=batch_size,
27
    learning_rate=0.001 / batch_size,
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
    learning_method=MomentumOptimizer(0.9),
    regularization=L2Regularization(0.0005 * batch_size))

img = data_layer(name='image', size=height * width * 3)


def vgg_network(vgg_num=3):
    tmp = img_conv_group(
        input=img,
        num_channels=3,
        conv_padding=1,
        conv_num_filter=[64, 64],
        conv_filter_size=3,
        conv_act=ReluActivation(),
        pool_size=2,
        pool_stride=2,
        pool_type=MaxPooling())

    tmp = img_conv_group(
        input=tmp,
        conv_num_filter=[128, 128],
        conv_padding=1,
        conv_filter_size=3,
        conv_act=ReluActivation(),
        pool_stride=2,
        pool_type=MaxPooling(),
        pool_size=2)

    channels = []
    for i in range(vgg_num):
        channels.append(256)
    tmp = img_conv_group(
        input=tmp,
        conv_num_filter=channels,
        conv_padding=1,
        conv_filter_size=3,
        conv_act=ReluActivation(),
        pool_stride=2,
        pool_type=MaxPooling(),
        pool_size=2)
    channels = []
    for i in range(vgg_num):
        channels.append(512)
    tmp = img_conv_group(
        input=tmp,
        conv_num_filter=channels,
        conv_padding=1,
        conv_filter_size=3,
        conv_act=ReluActivation(),
        pool_stride=2,
        pool_type=MaxPooling(),
        pool_size=2)
    tmp = img_conv_group(
        input=tmp,
        conv_num_filter=channels,
        conv_padding=1,
        conv_filter_size=3,
        conv_act=ReluActivation(),
        pool_stride=2,
        pool_type=MaxPooling(),
        pool_size=2)

    tmp = fc_layer(
        input=tmp,
        size=4096,
        act=ReluActivation(),
        layer_attr=ExtraAttr(drop_rate=0.5))

    tmp = fc_layer(
        input=tmp,
        size=4096,
        act=ReluActivation(),
        layer_attr=ExtraAttr(drop_rate=0.5))

    return fc_layer(input=tmp, size=num_class, act=SoftmaxActivation())


if layer_num == 16:
    vgg = vgg_network(3)
elif layer_num == 19:
    vgg = vgg_network(4)
else:
    print("Wrong layer number.")

T
tensor-tang 已提交
112 113 114 115 116 117
if is_infer:
    outputs(vgg)
else:
    lab = data_layer('label', num_class)
    loss = cross_entropy(input=vgg, label=lab)
    outputs(loss)