ShuffleNetV2架构复现

7d20aabc · 别团等shy哥发育 · 0f24332a · 7d20aabc · 7d20aabc · 7d20aabc
3 changed file
--- a/经典网络/ShuffleNet/ShuffleNetV2.py
+++ b/经典网络/ShuffleNet/ShuffleNetV2.py
+import tensorflow as tf
+from tensorflow.keras.layers import concatenate, Conv2D, Activation, BatchNormalization, DepthwiseConv2D
+from tensorflow.keras.layers import add, AvgPool2D, MaxPooling2D, GlobalAveragePooling2D, Dense
+from tensorflow.keras.layers import ReLU, Concatenate,Input
+from tensorflow.keras.models import Model
+from plot_model import plot_model
+# 标准卷积块：卷积+批标准化+ReLU
+# 普通卷积：卷积+批标准化+ReLU激活
+def conv_block(inputs, filters, kernel_size, stride=1):
+    x = Conv2D(filters, kernel_size, stride, padding='same', use_bias=False)(inputs)
+    x = BatchNormalization()(x)
+    x = ReLU()(x)
+    return x
+# 深度可分离卷积模块
+# 论文中DWconv的kernel_size都是3*3，只是下采样模块中的stride=2
+def depthwise_conv_block(inputs, kernel_size, stride=1):
+    x = DepthwiseConv2D(kernel_size,
+                        strides=stride,
+                        padding='same',
+                        use_bias=False  # 有BN就不要用偏置
+                        )(inputs)
+    x = BatchNormalization()(x)
+    return x
+# Channel Shuffle模块
+# 通道重排,跨组信息交互
+# num_groups=2:论文中默认就是2组特征，对应左分支的shortcut和右边经过卷积之后的
+def channel_shuffle(inputs, num_groups=2):
+    # 先得到输入特征图的shape，b:batch size，h,w:一张图的size，c:通道数
+    b, h, w, c = inputs.shape
+    # 确定shape = [b, h, w, num_groups, c//num_groups]。通道维度原来是一个长为c的一维tensor，变成num_groups行n列的矩阵
+    # 在通道维度上将特征图reshape为num_groups行n列的矩阵
+    x_reshaped = tf.reshape(inputs, [-1, h, w, num_groups, c // num_groups])
+    # 确定转置的矩形的shape = [b, h, w, c//num_groups, num_groups]
+    # 矩阵转置，最后两个维度从num_groups行n列变成n行num_groups列
+    x_transposed = tf.transpose(x_reshaped, [0, 1, 2, 4, 3])
+    # 重新排列，shotcut和x的通道像素交叉排列，通道维度重新变成一维tensor
+    output = tf.reshape(x_transposed, [-1, h, w, c])
+    # 返回通道维度交叉排序后的tensor
+    return output
+# Channel Split操作
+def channel_split(inputs, num_splits=2):
+    b1, b2 = tf.split(inputs, num_splits, axis=-1)
+    return b1, b2
+# ShuffleNetV2基本模块
+# 长宽不变，通道数不变
+def shuffle_block_s1(inputs, out_channels):
+    # 论文中直接将特征图在通道维度分成两半，分别经过左右分支
+    x1, x2 = channel_split(inputs)  # 此时左右分支的通道数都只有原来的一半
+    # 右分支：1*1Conv+3*3DWConv+1*1Conv
+    x2 = conv_block(x2, filters=out_channels // 2, kernel_size=(1, 1), stride=1)
+    x2 = depthwise_conv_block(x2, kernel_size=(3, 3), stride=1)
+    x2 = conv_block(x2, filters=out_channels // 2, kernel_size=(1, 1), stride=1)
+    # 左右分支在通道方向上堆叠(concat)
+    x = Concatenate(axis=-1)([x1, x2])
+    # Channel Shuffle
+    x = channel_shuffle(x)
+    return x
+# ShuffleNetV2下采样模块
+# 下采样模块没有进行Channel Split操作，最后还是用了Concat
+# 所以长宽减半，通道数加倍
+# 左分支输出特征图数量+右分支输出特征图数量=下采样模块输出特征图数量
+def shuffle_block_s2(inputs, out_channels):
+    shortcut=inputs
+    in_channels = inputs.shape[-1]
+    # 左分支：3*3 DWConv(stride=2)+1*1Conv
+    shortcut = depthwise_conv_block(shortcut, kernel_size=(3, 3), stride=2)  # 特征图size减半
+    shortcut = conv_block(shortcut,in_channels, kernel_size=(1, 1), stride=1)
+    # 右分支：1*1Conv+3*3DWConv(stride=2)+1*1Conv
+    x=conv_block(inputs,in_channels//2,kernel_size=(1,1),stride=1)
+    x=depthwise_conv_block(x,kernel_size=(3,3),stride=2)
+    # 右分支的通道数和左分支的通道数叠加 == 输出特征图的通道数out_channel
+    x=conv_block(x,out_channels-in_channels,kernel_size=(1,1),stride=1)
+    # 左右分支的特征在通道维度上堆叠，out.shape[-1]==out_channel
+    out=Concatenate(axis=-1)([shortcut,x])
+    out=channel_shuffle(out,2)
+    return out
+# stage模块
+def stage(inputs,out_channels,n):
+    # 每个stage中的第一个block的stride = 2(即下采样模块)，其他block的stride = 1(即基本模块)
+    # 都是按照论文搭建的，要去看论文原文，要不你绝对不理解为什么这样搭建，嘿嘿。
+    # 下采样单元
+    x=shuffle_block_s2(inputs,out_channels)
+    for _ in range(n):
+        x=shuffle_block_s1(x,out_channels)
+    return x
+# 完整网络搭建
+def ShuffleNet(input_shape,num_classes):
+    # 构建输入Tensor
+    inputs=Input(shape=input_shape)
+    x=Conv2D(filters=24,kernel_size=(3,3),strides=2,padding='same')(inputs)
+    x=MaxPooling2D(pool_size=(3,3),strides=2,padding='same')(x)
+    x=stage(x,out_channels=116,n=3)
+    x=stage(x,out_channels=232,n=7)
+    x=stage(x,out_channels=464,n=3)
+    x=Conv2D(filters=1024,kernel_size=(1,1),strides=1,padding='same')(x)
+    x=GlobalAveragePooling2D()(x)
+    x=Dense(num_classes,activation='softmax')(x)
+    model=Model(inputs,x)
+    return model
+model=ShuffleNet(input_shape=(224,224,3),num_classes=1000)
+model.summary()
+plot_model(model,to_file='img/ShuffleNetV2.png')
\ No newline at end of file
--- a/经典网络/ShuffleNet/img/ShuffleNetV2.pdf
+++ b/经典网络/ShuffleNet/img/ShuffleNetV2.pdf
--- a/经典网络/ShuffleNet/img/ShuffleNetV2.png
+++ b/经典网络/ShuffleNet/img/ShuffleNetV2.png