提交 7d20aabc 编写于 作者: 别团等shy哥发育's avatar 别团等shy哥发育

ShuffleNetV2架构复现

上级 0f24332a
import tensorflow as tf
from tensorflow.keras.layers import concatenate, Conv2D, Activation, BatchNormalization, DepthwiseConv2D
from tensorflow.keras.layers import add, AvgPool2D, MaxPooling2D, GlobalAveragePooling2D, Dense
from tensorflow.keras.layers import ReLU, Concatenate,Input
from tensorflow.keras.models import Model
from plot_model import plot_model
# 标准卷积块:卷积+批标准化+ReLU
# 普通卷积:卷积+批标准化+ReLU激活
def conv_block(inputs, filters, kernel_size, stride=1):
x = Conv2D(filters, kernel_size, stride, padding='same', use_bias=False)(inputs)
x = BatchNormalization()(x)
x = ReLU()(x)
return x
# 深度可分离卷积模块
# 论文中DWconv的kernel_size都是3*3,只是下采样模块中的stride=2
def depthwise_conv_block(inputs, kernel_size, stride=1):
x = DepthwiseConv2D(kernel_size,
strides=stride,
padding='same',
use_bias=False # 有BN就不要用偏置
)(inputs)
x = BatchNormalization()(x)
return x
# Channel Shuffle模块
# 通道重排,跨组信息交互
# num_groups=2:论文中默认就是2组特征,对应左分支的shortcut和右边经过卷积之后的
def channel_shuffle(inputs, num_groups=2):
# 先得到输入特征图的shape,b:batch size,h,w:一张图的size,c:通道数
b, h, w, c = inputs.shape
# 确定shape = [b, h, w, num_groups, c//num_groups]。通道维度原来是一个长为c的一维tensor,变成num_groups行n列的矩阵
# 在通道维度上将特征图reshape为num_groups行n列的矩阵
x_reshaped = tf.reshape(inputs, [-1, h, w, num_groups, c // num_groups])
# 确定转置的矩形的shape = [b, h, w, c//num_groups, num_groups]
# 矩阵转置,最后两个维度从num_groups行n列变成n行num_groups列
x_transposed = tf.transpose(x_reshaped, [0, 1, 2, 4, 3])
# 重新排列,shotcut和x的通道像素交叉排列,通道维度重新变成一维tensor
output = tf.reshape(x_transposed, [-1, h, w, c])
# 返回通道维度交叉排序后的tensor
return output
# Channel Split操作
def channel_split(inputs, num_splits=2):
b1, b2 = tf.split(inputs, num_splits, axis=-1)
return b1, b2
# ShuffleNetV2基本模块
# 长宽不变,通道数不变
def shuffle_block_s1(inputs, out_channels):
# 论文中直接将特征图在通道维度分成两半,分别经过左右分支
x1, x2 = channel_split(inputs) # 此时左右分支的通道数都只有原来的一半
# 右分支:1*1Conv+3*3DWConv+1*1Conv
x2 = conv_block(x2, filters=out_channels // 2, kernel_size=(1, 1), stride=1)
x2 = depthwise_conv_block(x2, kernel_size=(3, 3), stride=1)
x2 = conv_block(x2, filters=out_channels // 2, kernel_size=(1, 1), stride=1)
# 左右分支在通道方向上堆叠(concat)
x = Concatenate(axis=-1)([x1, x2])
# Channel Shuffle
x = channel_shuffle(x)
return x
# ShuffleNetV2下采样模块
# 下采样模块没有进行Channel Split操作,最后还是用了Concat
# 所以长宽减半,通道数加倍
# 左分支输出特征图数量+右分支输出特征图数量=下采样模块输出特征图数量
def shuffle_block_s2(inputs, out_channels):
shortcut=inputs
in_channels = inputs.shape[-1]
# 左分支:3*3 DWConv(stride=2)+1*1Conv
shortcut = depthwise_conv_block(shortcut, kernel_size=(3, 3), stride=2) # 特征图size减半
shortcut = conv_block(shortcut,in_channels, kernel_size=(1, 1), stride=1)
# 右分支:1*1Conv+3*3DWConv(stride=2)+1*1Conv
x=conv_block(inputs,in_channels//2,kernel_size=(1,1),stride=1)
x=depthwise_conv_block(x,kernel_size=(3,3),stride=2)
# 右分支的通道数和左分支的通道数叠加 == 输出特征图的通道数out_channel
x=conv_block(x,out_channels-in_channels,kernel_size=(1,1),stride=1)
# 左右分支的特征在通道维度上堆叠,out.shape[-1]==out_channel
out=Concatenate(axis=-1)([shortcut,x])
out=channel_shuffle(out,2)
return out
# stage模块
def stage(inputs,out_channels,n):
# 每个stage中的第一个block的stride = 2(即下采样模块),其他block的stride = 1(即基本模块)
# 都是按照论文搭建的,要去看论文原文,要不你绝对不理解为什么这样搭建,嘿嘿。
# 下采样单元
x=shuffle_block_s2(inputs,out_channels)
for _ in range(n):
x=shuffle_block_s1(x,out_channels)
return x
# 完整网络搭建
def ShuffleNet(input_shape,num_classes):
# 构建输入Tensor
inputs=Input(shape=input_shape)
x=Conv2D(filters=24,kernel_size=(3,3),strides=2,padding='same')(inputs)
x=MaxPooling2D(pool_size=(3,3),strides=2,padding='same')(x)
x=stage(x,out_channels=116,n=3)
x=stage(x,out_channels=232,n=7)
x=stage(x,out_channels=464,n=3)
x=Conv2D(filters=1024,kernel_size=(1,1),strides=1,padding='same')(x)
x=GlobalAveragePooling2D()(x)
x=Dense(num_classes,activation='softmax')(x)
model=Model(inputs,x)
return model
model=ShuffleNet(input_shape=(224,224,3),num_classes=1000)
model.summary()
plot_model(model,to_file='img/ShuffleNetV2.png')
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册