MnasNet神经架构搜索

432d3c73 · 别团等shy哥发育 · e63ec6d3 · 432d3c73 · 432d3c73 · 432d3c73
6 changed file
--- a/经典网络/MnasNet/MnasNet.ipynb
+++ b/经典网络/MnasNet/MnasNet.ipynb
--- a/经典网络/MnasNet/MnasNet.py
+++ b/经典网络/MnasNet/MnasNet.py
+from tensorflow.keras import optimizers, layers, models, callbacks, utils, preprocessing, regularizers
+from tensorflow.keras import backend as K
+import tensorflow as tf
+import numpy as np
+
+
+def MnasNet(n_classes=1000, input_shape=(224, 224, 3), alpha=1):
+    inputs = layers.Input(shape=input_shape)
+
+    x = conv_bn(inputs, 32 * alpha, 3, strides=2)
+    x = sepConv_bn_noskip(x, 16 * alpha, 3, strides=1)
+    # MBConv3 3x3
+    x = MBConv_idskip(x, filters=24, kernel_size=3, strides=2, filters_multiplier=3, alpha=alpha)
+    x = MBConv_idskip(x, filters=24, kernel_size=3, strides=1, filters_multiplier=3, alpha=alpha)
+    x = MBConv_idskip(x, filters=24, kernel_size=3, strides=1, filters_multiplier=3, alpha=alpha)
+    # MBConv3 5x5
+    x = MBConv_idskip(x, filters=40, kernel_size=5, strides=2, filters_multiplier=3, alpha=alpha)
+    x = MBConv_idskip(x, filters=40, kernel_size=5, strides=1, filters_multiplier=3, alpha=alpha)
+    x = MBConv_idskip(x, filters=40, kernel_size=5, strides=1, filters_multiplier=3, alpha=alpha)
+    # MBConv6 5x5
+    x = MBConv_idskip(x, filters=80, kernel_size=5, strides=2, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=80, kernel_size=5, strides=1, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=80, kernel_size=5, strides=1, filters_multiplier=6, alpha=alpha)
+    # MBConv6 3x3
+    x = MBConv_idskip(x, filters=96, kernel_size=3, strides=1, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=96, kernel_size=3, strides=1, filters_multiplier=6, alpha=alpha)
+    # MBConv6 5x5
+    x = MBConv_idskip(x, filters=192, kernel_size=5, strides=2, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=192, kernel_size=5, strides=1, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=192, kernel_size=5, strides=1, filters_multiplier=6, alpha=alpha)
+    x = MBConv_idskip(x, filters=192, kernel_size=5, strides=1, filters_multiplier=6, alpha=alpha)
+    # MBConv6 3x3
+    x = MBConv_idskip(x, filters=320, kernel_size=3, strides=1, filters_multiplier=6, alpha=alpha)
+
+    # FC + POOL
+    x = conv_bn(x, filters=1152 * alpha, kernel_size=1, strides=1)
+    x = layers.GlobalAveragePooling2D()(x)
+    predictions = layers.Dense(n_classes, activation='softmax')(x)
+
+    return models.Model(inputs=inputs, outputs=predictions)
+
+
+# Convolution with batch normalization
+def conv_bn(x, filters, kernel_size, strides=1, alpha=1, activation=True):
+    """Convolution Block
+    This function defines a 2D convolution operation with BN and relu6.
+    # Arguments
+        x: Tensor, input tensor of conv layer.
+        filters: Integer, the dimensionality of the output space.
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            width and height of the 2D convolution window.
+        strides: An integer or tuple/list of 2 integers,
+            specifying the strides of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+        alpha: An integer which multiplies the filters dimensionality
+        activation: A boolean which indicates whether to have an activation after the normalization
+    # Returns
+        Output tensor.
+    """
+    filters = _make_divisible(filters * alpha)
+    x = layers.Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same',
+                      use_bias=False, kernel_regularizer=regularizers.l2(l=0.0003))(x)
+    x = layers.BatchNormalization(epsilon=1e-3, momentum=0.999)(x)
+    if activation:
+        x = layers.ReLU(max_value=6)(x)
+    return x
+
+
+# Depth-wise Separable Convolution with batch normalization
+def depthwiseConv_bn(x, depth_multiplier, kernel_size, strides=1):
+    """ Depthwise convolution
+    The DepthwiseConv2D is just the first step of the Depthwise Separable convolution (without the pointwise step).
+    Depthwise Separable convolutions consists in performing just the first step in a depthwise spatial convolution
+    (which acts on each input channel separately).
+
+    This function defines a 2D Depthwise separable convolution operation with BN and relu6.
+    # Arguments
+        x: Tensor, input tensor of conv layer.
+        filters: Integer, the dimensionality of the output space.
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            width and height of the 2D convolution window.
+        strides: An integer or tuple/list of 2 integers,
+            specifying the strides of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+    # Returns
+        Output tensor.
+    """
+
+    x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=strides, depth_multiplier=depth_multiplier,
+                               padding='same', use_bias=False, kernel_regularizer=regularizers.l2(l=0.0003))(x)
+    x = layers.BatchNormalization(epsilon=1e-3, momentum=0.999)(x)
+    x = layers.ReLU(max_value=6)(x)
+    return x
+
+
+def sepConv_bn_noskip(x, filters, kernel_size, strides=1):
+    """ Separable convolution block (Block F of MNasNet paper https://arxiv.org/pdf/1807.11626.pdf)
+
+    # Arguments
+        x: Tensor, input tensor of conv layer.
+        filters: Integer, the dimensionality of the output space.
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            width and height of the 2D convolution window.
+        strides: An integer or tuple/list of 2 integers,
+            specifying the strides of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+    # Returns
+        Output tensor.
+    """
+
+    x = depthwiseConv_bn(x, depth_multiplier=1, kernel_size=kernel_size, strides=strides)
+    x = conv_bn(x, filters=filters, kernel_size=1, strides=1)
+
+    return x
+
+
+# Inverted bottleneck block with identity skip connection
+def MBConv_idskip(x_input, filters, kernel_size, strides=1, filters_multiplier=1, alpha=1):
+    """ Mobile inverted bottleneck convolution (Block b, c, d, e of MNasNet paper https://arxiv.org/pdf/1807.11626.pdf)
+
+    # Arguments
+        x: Tensor, input tensor of conv layer.
+        filters: Integer, the dimensionality of the output space.
+        kernel_size: An integer or tuple/list of 2 integers, specifying the
+            width and height of the 2D convolution window.
+        strides: An integer or tuple/list of 2 integers,
+            specifying the strides of the convolution along the width and height.
+            Can be a single integer to specify the same value for
+            all spatial dimensions.
+        alpha: An integer which multiplies the filters dimensionality
+
+    # Returns
+        Output tensor.
+    """
+
+    # depthwise_conv_filters = _make_divisible(x_input.shape[3].value)
+    depthwise_conv_filters = _make_divisible(x_input.shape[3])
+    pointwise_conv_filters = _make_divisible(filters * alpha)
+
+    x = conv_bn(x_input, filters=depthwise_conv_filters * filters_multiplier, kernel_size=1, strides=1)
+    x = depthwiseConv_bn(x, depth_multiplier=1, kernel_size=kernel_size, strides=strides)
+    x = conv_bn(x, filters=pointwise_conv_filters, kernel_size=1, strides=1, activation=False)
+
+    # Residual connection if possible
+    if strides == 1 and x.shape[3] == x_input.shape[3]:
+        return layers.add([x_input, x])
+    else:
+        return x
+
+
+# This function is taken from the original tf repo.
+# It ensures that all layers have a channel number that is divisible by 8
+# It can be seen here:
+# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
+def _make_divisible(v, divisor=8, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+if __name__ == "__main__":
+    model = MnasNet()
+    model.compile(optimizer='adam')
+    model.summary()
\ No newline at end of file
--- a/经典网络/MnasNet/MnasNet1.py
+++ b/经典网络/MnasNet/MnasNet1.py
+import tensorflow as tf
+from tensorflow import keras
+from tensorflow.keras import layers, Model
+from plot_model import plot_model
+
+
+# （1）标准卷积模块
+def conv_block(input_tensor, filters, kernel_size, stride):
+    # 普通卷积+BN+激活
+    x = layers.Conv2D(filters=filters,  # 卷积核个数
+                      kernel_size=kernel_size,  # 卷积核size
+                      strides=stride,  # 步长
+                      use_bias=False,  # 有BN层就不要偏置
+                      padding='same')(input_tensor)  # 步长=1时特征图size不变，步长=2时特征图长宽减半
+
+    x = layers.BatchNormalization()(x)  # 批标准化
+
+    x = layers.ReLU()(x)  # relu激活函数
+
+    return x  # 如果activation=False可以直接输出结果
+
+
+# （2）深度卷积
+def depthwise_conv_block(input_tensor, kernel_size, stride):
+    # 深度卷积只处理长宽方向的空间信息，输入输出的通道数相同
+    x = layers.DepthwiseConv2D(kernel_size=kernel_size,  # 卷积核size
+                               strides=stride,  # 步长
+                               use_bias=False,  # 有BN层不要偏置
+                               padding='same')(input_tensor)  # stride=1卷积过程中size不变
+
+    x = layers.BatchNormalization()(x)  # 批标准化
+
+    x = layers.ReLU()(x)  # 激活函数
+
+    return x  # 返回深度卷积的特征图，个数保持不变
+
+
+# （3）逐点卷积
+def pointwise_conv_block(input_tensor, filters):
+    # 1*1卷积只负责通道方向的信息融合，一个卷积核输出一张特征图
+    x = layers.Conv2D(filters=filters,  # 卷积核个数，即输出特征图个数
+                      kernel_size=(1, 1),  # 卷积核size=1*1，不处理长宽方向的信息
+                      strides=1,  # 步长=1卷积过程中特征图size不变
+                      padding='same',  # 卷积过程中size不变
+                      use_bias=False)(input_tensor)  # 有BN层就不要偏置
+
+    x = layers.BatchNormalization()(x)  # 批标准化
+
+    # 不使用relu激活函数，线性激活
+    return x
+
+
+# （4）深度可分离卷积 == 深度卷积 + 逐点卷积
+def sep_conv_block(input_tensor, kernel_size, stride, filters):
+    # 深度卷积，处理长宽方向的空间信息，不关心跨通道信息
+    x = depthwise_conv_block(input_tensor, kernel_size, stride)
+    # 逐点卷积，处理跨通道信息，跨层信息交融
+    x = pointwise_conv_block(x, filters)
+
+    return x  # 返回深度可分离卷积输出特征图
+
+
+# （5）深度可分离卷积的逆转残差模块
+# 1x1标准卷积升维N倍，然后深度卷积，再1x1逐点卷积降维
+def inverted_res_block(input_tensor, expansion, kernel_size, stride, out_channel):
+    # keras.backend.int_shape得到图像的shape，这里只需要最后一个维度即通道维度的大小
+    in_channel = keras.backend.int_shape(input_tensor)[-1]
+
+    # 调用自定义的标准卷积函数，上升通道数
+    x = conv_block(input_tensor,  # 输入特征图
+                   kernel_size=(1, 1),  # 卷积核size
+                   filters=in_channel * expansion,  # 通道上升为原来的expansion倍
+                   stride=1)
+
+    # 调用自定义的深度卷积函数
+    x = depthwise_conv_block(x, kernel_size=kernel_size, stride=stride)
+
+    # 调用自定义的逐点卷积函数，下降通道数
+    x = pointwise_conv_block(x, filters=out_channel)  # out_channel输出特征图数量
+
+    # 如果步长=1，并且输入和输出的shape相同时，输入和输出残差连接
+    if stride == 1 and input_tensor.shape == x.shape:
+        output = layers.Add()([input_tensor, x])
+        return output
+
+    # 如果步长=2，直接输出逐点卷积后的结果
+    else:
+        return x
+
+
+# （6）一个MBConv模块是由一个下采样模块(stride=2)和若干个基本模块(stride=1)组成
+def MBConv(input_tensor, expansion, kernel_size, filters, stride, num):
+    # 一个下采样模块，也可能不需要下采样
+    x = inverted_res_block(input_tensor, expansion, kernel_size, stride, out_channel=filters)
+
+    # num-1个基本模块。num代表整个MBConv模块包含几个inverted_res_block模块
+    for _ in range(1, num):
+        x = inverted_res_block(x, expansion, kernel_size, stride=1, out_channel=filters)
+
+    return x  # 返回MBConv卷积块的特征图
+
+
+# SENet注意力机制模块
+# （7）定义压缩和激活方法SE
+def squeeze_excitation(input_tensor):
+    inputs = input_tensor  # 将特征图复制一份
+    squeeze = inputs.shape[-1] / 2  # 将特征图在通道维度上平分成两份，即压缩量为原通道的1/2
+    excitation = inputs.shape[-1]  # 通道上升到原通道数大小
+
+    # 如：[416,416,24]==>[None,24]
+    x = layers.GlobalAveragePooling2D()(input_tensor)  # 全局平均池化
+    # 如：[None,24]==>[None,12]
+    x = layers.Dense(squeeze)(x)  # 全连接层，通道数减半
+    # 激活函数，shape不变
+    x = layers.ReLU()(x)
+    # 如：[None,12]==>[None,24]
+    x = layers.Dense(excitation)(x)  # 全连接层，通道数回升至原来
+    # 激活函数，shape不变
+    x = tf.nn.sigmoid(x)
+    # 如：[None,24]==>[1,1,24]
+    x = layers.Reshape(target_shape=(1, 1, excitation))(x)
+    # [416,416,24]*[1,1,24]==>[416,416,24]
+    # 点乘，元素之间相乘，shape不变(直接用*也代表点乘)
+    output = layers.Multiply()([inputs, x])
+
+    return output
+
+
+# 添加了SE注意力机制后的逆残差块
+# （8）应用压缩和激活方法后的深度可分离卷积的逆转残差模块
+def inverted_se_res_block(input_tensor, expansion, kernel_size, stride, out_channel):
+    # 就比inverted_res_block多了一个SE层，其他都一样
+
+    # 得到输出特征图的通道数量
+    in_channel = keras.backend.int_shape(input_tensor)[-1]
+
+    # 1*1标准卷积模块，通道数上升expansion倍
+    x = conv_block(input_tensor, filters=in_channel * expansion,
+                   kernel_size=(1, 1), stride=1)
+
+    # 深度卷积模块，输出的特征图的通道数不变
+    x = depthwise_conv_block(x, kernel_size, stride)
+
+    # SE模块
+    x = squeeze_excitation(x)
+
+    # 逐点卷积，1*1卷积下降通道数
+    x = pointwise_conv_block(x, filters=out_channel)
+
+    # 如果步长=1，并且输入和输出的shape相同时，需要残差连接输入和输出
+    if stride == 1 and input_tensor.shape == x.shape:
+        output = layers.Add()([input_tensor, x])
+        return output
+
+    # 如果步长=2，直接输出逐点卷积结果
+    else:
+        return x
+
+
+# （9）一个MBConv_SE模块是由一个下采样模块(stride=2)和若干个基本模块(stride=1)组成
+def MBConv_SE(input_tensor, expansion, kernel_size, filters, stride, num):
+    # 一个下采样模块，也可能不需要下采样
+    x = inverted_se_res_block(input_tensor, expansion, kernel_size, stride, out_channel=filters)
+
+    # num-1个基本模块。num代表整个MBConv模块包含几个inverted_res_block模块
+    for _ in range(1, num):
+        x = inverted_se_res_block(x, expansion, kernel_size, stride=1, out_channel=filters)
+
+    return x  # 返回MBConv_SE卷积块的特征图
+
+
+# （10）搭建主干网络
+def MnasNet(input_shape, classes):
+    # 构建网络输入tensor
+    inputs = keras.Input(shape=input_shape)
+
+    # [224,224,3]==>[112,112,32]
+    x = conv_block(inputs, 32, kernel_size=(3, 3), stride=2)
+    # [112,112,32]==>[112,112,16]
+    x = sep_conv_block(x, kernel_size=(3, 3), stride=1, filters=16)
+    # [112,112,16]==>[56,56,24]
+    x = MBConv(x, expansion=6, kernel_size=(3, 3), filters=24, stride=2, num=2)
+    # [56,56,24]==>[28,28,40]
+    x = MBConv_SE(x, expansion=3, kernel_size=(5, 5), filters=40, stride=2, num=3)
+    # [28,28,40]==>[14,14,80]
+    x = MBConv(x, expansion=6, kernel_size=(3, 3), filters=80, stride=2, num=4)
+    # [14,14,80]==>[14,14,112]
+    x = MBConv_SE(x, expansion=6, kernel_size=(3, 3), filters=112, stride=1, num=2)
+    # [14,14,112]==>[7,7,160]
+    x = MBConv_SE(x, expansion=6, kernel_size=(5, 5), filters=160, stride=2, num=3)
+    # [7,7,160]==>[7,7,320]
+    x = MBConv(x, expansion=6, kernel_size=(3, 3), filters=320, stride=1, num=1)
+
+    # 再进行一次标准卷积 [7,7,320]==>[7,7,1280]
+    x = conv_block(x, filters=1280, kernel_size=(1, 1), stride=1)
+    # [7,7,1280]==>[None,1280]
+    x = layers.GlobalAveragePooling2D()(x)
+    # [None,1280]==>[None,1000]
+    logits = layers.Dense(classes,activation='softmax')(x)
+
+    # 完成网络构架
+    model = Model(inputs, logits)
+
+    return model
+
+
+# （11）接收网络模型
+if __name__ == '__main__':
+    model = MnasNet(input_shape=[224, 224, 3], classes=1000)  # 给出输入图像shape和分类数
+
+    # 查看网络结构
+    model.summary()
+    plot_model(model, to_file='img/MnasNet.png', show_shapes=True)
--- a/经典网络/MnasNet/img/MnasNet-class17.png
+++ b/经典网络/MnasNet/img/MnasNet-class17.png
--- a/经典网络/MnasNet/img/MnasNet.pdf
+++ b/经典网络/MnasNet/img/MnasNet.pdf
--- a/经典网络/MnasNet/img/MnasNet.png
+++ b/经典网络/MnasNet/img/MnasNet.png