models.py

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid

import contextlib
name_scope = ""

decode_channel = 48
encode_channel = 256
label_number = 19

bn_momentum = 0.99
dropout_keep_prop = 0.9
is_train = True

op_results = {}

default_epsilon = 1e-3
default_norm_type = 'bn'
default_group_number = 32


@contextlib.contextmanager
def scope(name):
    global name_scope
    bk = name_scope
    name_scope = name_scope + name + '/'
    yield
    name_scope = bk


def check(data, number):
    if type(data) == int:
        return [data] * number
    assert len(data) == number
    return data


def clean():
    global op_results
    op_results = {}


def append_op_result(result, name):
    global op_results
    op_index = len(op_results)
    name = name_scope + name + str(op_index)
    op_results[name] = result
    return result


def conv(*args, **kargs):
    kargs['param_attr'] = name_scope + 'weights'
    if 'bias_attr' in kargs and kargs['bias_attr']:
        kargs['bias_attr'] = name_scope + 'biases'
    else:
        kargs['bias_attr'] = False
    return append_op_result(fluid.layers.conv2d(*args, **kargs), 'conv')


def group_norm(input, G, eps=1e-5, param_attr=None, bias_attr=None):
    helper = fluid.layer_helper.LayerHelper('group_norm', **locals())

    N, C, H, W = input.shape
    if C % G != 0:
        print("group can not divide channle:", C, G)
        for d in range(10):
            for t in [d, -d]:
                if G + t <= 0: continue
                if C % (G + t) == 0:
                    G = G + t
                    break
            if C % G == 0:
                print("use group size:", G)
                break
    assert C % G == 0
    param_shape = (G, )
    x = input
    x = fluid.layers.reshape(x, [N, G, C // G * H * W])
    mean = fluid.layers.reduce_mean(x, dim=2, keep_dim=True)
    x = x - mean
    var = fluid.layers.reduce_mean(fluid.layers.square(x), dim=2, keep_dim=True)
    x = x / fluid.layers.sqrt(var + eps)

    scale = helper.create_parameter(
        attr=helper.param_attr,
        shape=param_shape,
        dtype='float32',
        default_initializer=fluid.initializer.Constant(1.0))

    bias = helper.create_parameter(
        attr=helper.bias_attr, shape=param_shape, dtype='float32', is_bias=True)
    x = fluid.layers.elementwise_add(
        fluid.layers.elementwise_mul(
            x, scale, axis=1), bias, axis=1)
    return fluid.layers.reshape(x, input.shape)


def bn(*args, **kargs):
    if default_norm_type == 'bn':
        with scope('BatchNorm'):
            return append_op_result(
                fluid.layers.batch_norm(
                    *args,
                    epsilon=default_epsilon,
                    momentum=bn_momentum,
                    param_attr=name_scope + 'gamma',
                    bias_attr=name_scope + 'beta',
                    moving_mean_name=name_scope + 'moving_mean',
                    moving_variance_name=name_scope + 'moving_variance',
                    **kargs),
                'bn')
    elif default_norm_type == 'gn':
        with scope('GroupNorm'):
            return append_op_result(
                group_norm(
                    args[0],
                    default_group_number,
                    eps=default_epsilon,
                    param_attr=name_scope + 'gamma',
                    bias_attr=name_scope + 'beta'),
                'gn')
    else:
        raise "Unsupport norm type:" + default_norm_type


def bn_relu(data):
    return append_op_result(fluid.layers.relu(bn(data)), 'relu')


def relu(data):
    return append_op_result(fluid.layers.relu(data), 'relu')


def seq_conv(input, channel, stride, filter, dilation=1, act=None):
    with scope('depthwise'):
        input = conv(
            input,
            input.shape[1],
            filter,
            stride,
            groups=input.shape[1],
            padding=(filter // 2) * dilation,
            dilation=dilation)
        input = bn(input)
        if act: input = act(input)
    with scope('pointwise'):
        input = conv(input, channel, 1, 1, groups=1, padding=0)
        input = bn(input)
        if act: input = act(input)
    return input


def xception_block(input,
                   channels,
                   strides=1,
                   filters=3,
                   dilation=1,
                   skip_conv=True,
                   has_skip=True,
                   activation_fn_in_separable_conv=False):
    repeat_number = 3
    channels = check(channels, repeat_number)
    filters = check(filters, repeat_number)
    strides = check(strides, repeat_number)
    data = input
    results = []
    for i in range(repeat_number):
        with scope('separable_conv' + str(i + 1)):
            if not activation_fn_in_separable_conv:
                data = relu(data)
                data = seq_conv(
                    data,
                    channels[i],
                    strides[i],
                    filters[i],
                    dilation=dilation)
            else:
                data = seq_conv(
                    data,
                    channels[i],
                    strides[i],
                    filters[i],
                    dilation=dilation,
                    act=relu)
            results.append(data)
    if not has_skip:
        return append_op_result(data, 'xception_block'), results
    if skip_conv:
        with scope('shortcut'):
            skip = bn(
                conv(
                    input, channels[-1], 1, strides[-1], groups=1, padding=0))
    else:
        skip = input
    return append_op_result(data + skip, 'xception_block'), results


def entry_flow(data):
    with scope("entry_flow"):
        with scope("conv1"):
            data = conv(data, 32, 3, stride=2, padding=1)
            data = bn_relu(data)
        with scope("conv2"):
            data = conv(data, 64, 3, stride=1, padding=1)
            data = bn_relu(data)
        with scope("block1"):
            data, _ = xception_block(data, 128, [1, 1, 2])
        with scope("block2"):
            data, results = xception_block(data, 256, [1, 1, 2])
        with scope("block3"):
            data, _ = xception_block(data, 728, [1, 1, 2])
        return data, results[1]


def middle_flow(data):
    with scope("middle_flow"):
        for i in range(16):
            with scope("block" + str(i + 1)):
                data, _ = xception_block(data, 728, [1, 1, 1], skip_conv=False)
    return data


def exit_flow(data):
    with scope("exit_flow"):
        with scope('block1'):
            data, _ = xception_block(data, [728, 1024, 1024], [1, 1, 1])
        with scope('block2'):
            data, _ = xception_block(
                data, [1536, 1536, 2048], [1, 1, 1],
                dilation=2,
                has_skip=False,
                activation_fn_in_separable_conv=True)
        return data


def dropout(x, keep_rate):
    if is_train:
        return fluid.layers.dropout(x, 1 - keep_rate) / keep_rate
    else:
        return x


def encoder(input):
    with scope('encoder'):
        channel = 256
        with scope("image_pool"):
            image_avg = fluid.layers.reduce_mean(input, [2, 3], keep_dim=True)
            append_op_result(image_avg, 'reduce_mean')
            image_avg = bn_relu(
                conv(
                    image_avg, channel, 1, 1, groups=1, padding=0))
            image_avg = fluid.layers.resize_bilinear(image_avg, input.shape[2:])

        with scope("aspp0"):
            aspp0 = bn_relu(conv(input, channel, 1, 1, groups=1, padding=0))
        with scope("aspp1"):
            aspp1 = seq_conv(input, channel, 1, 3, dilation=6, act=relu)
        with scope("aspp2"):
            aspp2 = seq_conv(input, channel, 1, 3, dilation=12, act=relu)
        with scope("aspp3"):
            aspp3 = seq_conv(input, channel, 1, 3, dilation=18, act=relu)
        with scope("concat"):
            data = append_op_result(
                fluid.layers.concat(
                    [image_avg, aspp0, aspp1, aspp2, aspp3], axis=1),
                'concat')
            data = bn_relu(conv(data, channel, 1, 1, groups=1, padding=0))
            data = dropout(data, dropout_keep_prop)
        return data


def decoder(encode_data, decode_shortcut):
    with scope('decoder'):
        with scope('concat'):
            decode_shortcut = bn_relu(
                conv(
                    decode_shortcut, decode_channel, 1, 1, groups=1, padding=0))
            encode_data = fluid.layers.resize_bilinear(
                encode_data, decode_shortcut.shape[2:])
            encode_data = fluid.layers.concat(
                [encode_data, decode_shortcut], axis=1)
            append_op_result(encode_data, 'concat')
        with scope("separable_conv1"):
            encode_data = seq_conv(
                encode_data, encode_channel, 1, 3, dilation=1, act=relu)
        with scope("separable_conv2"):
            encode_data = seq_conv(
                encode_data, encode_channel, 1, 3, dilation=1, act=relu)
        return encode_data


def deeplabv3p(img):
    global default_epsilon
    append_op_result(img, 'img')
    with scope('xception_65'):
        default_epsilon = 1e-3
        # Entry flow
        data, decode_shortcut = entry_flow(img)
        # Middle flow
        data = middle_flow(data)
        # Exit flow
        data = exit_flow(data)
    default_epsilon = 1e-5
    encode_data = encoder(data)
    encode_data = decoder(encode_data, decode_shortcut)
    with scope('logit'):
        logit = conv(
            encode_data, label_number, 1, stride=1, padding=0, bias_attr=True)
        logit = fluid.layers.resize_bilinear(logit, img.shape[2:])
    return logit