提交 8bd85215 编写于 作者: 别团等shy哥发育's avatar 别团等shy哥发育

ShuffleNetV1架构复现

ShuffleNetV1花朵识别
上级 432d3c73
import tensorflow as tf
from tensorflow.keras.layers import concatenate, Conv2D, Activation, BatchNormalization, DepthwiseConv2D
from tensorflow.keras.layers import add, AvgPool2D,MaxPool2D,GlobalAveragePooling2D,Dense
from tensorflow.keras.models import Model
from plot_model import plot_model
# 通道重排,跨组信息交互
def channel_shuffle(inputs, num_groups):
# 先得到输入特征图的shape,b:batch size,h,w:一张图的size,c:通道数
b, h, w, c = inputs.shape
# 确定shape = [b, h, w, num_groups, c//num_groups]。通道维度原来是一个长为c的一维tensor,变成num_groups行n列的矩阵
# 在通道维度上将特征图reshape为num_groups行n列的矩阵
x_reshaped = tf.reshape(inputs, [-1, h, w, num_groups, c // num_groups])
# 确定转置的矩形的shape = [b, h, w, c//num_groups, num_groups]
# 矩阵转置,最后两个维度从num_groups行n列变成n行num_groups列
x_transposed = tf.transpose(x_reshaped, [0, 1, 2, 4, 3])
# 重新排列,shotcut和x的通道像素交叉排列,通道维度重新变成一维tensor
output = tf.reshape(x_transposed, [-1, h, w, c])
# 返回通道维度交叉排序后的tensor
return output
# 分组卷积操作
def group_conv(inputs, filters, kernel, strides, num_groups):
conv_side_layers_tmp = tf.split(inputs, num_groups, axis=3)
conv_side_layers = []
for layer in conv_side_layers_tmp:
conv_side_layers.append(tf.keras.layers.Conv2D(filters // num_groups, kernel, strides, padding='same')(layer))
x = concatenate(conv_side_layers, axis=-1)
return x
# 普通卷积:卷积+批标准化+ReLU激活
def conv(inputs, filters, kernel_size, stride=1, activation=False):
x = Conv2D(filters, kernel_size, stride, padding='same', use_bias=False)(inputs)
x = BatchNormalization()(x)
if activation:
x = Activation('relu')(x)
return x
# DWConv:深度可分离卷积块(论文中DWConv卷积核全是3*3,步长有1和2两种)
def depthwise_conv_bn(inputs, kernel_size, stride=1):
x = DepthwiseConv2D(kernel_size=kernel_size,
strides=stride,
padding='same',
use_bias=False)(inputs)
x = BatchNormalization()(x)
return x
# ShuffleNetV1基本模块(Add)
def shuffleNetUnitA(inputs, num_groups):
in_channels = inputs.shape[-1]
out_channels = in_channels
bottleneck_channels = out_channels // 4
# 1*1分组卷积降维
x = group_conv(inputs, bottleneck_channels, kernel=1, strides=1, num_groups=num_groups)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# Channel Shuffle
x = channel_shuffle(x, num_groups)
# 3*3 DWConv
x = depthwise_conv_bn(x, kernel_size=3, stride=1)
# 1*1分组卷积升维(要保证残差连接前后的shape一致)
x = group_conv(x, out_channels, kernel=1, strides=1, num_groups=num_groups)
x = BatchNormalization()(x)
x = add([inputs, x])
x = Activation('relu')(x)
return x
# ShuffleNetV1下采样模块(下采样模块,concat)
def shuffleNetUnitB(inputs, out_channels, num_groups):
in_channels = inputs.shape[-1]
# 右分支的通道数和左分支的通道数叠加 == 输出特征图的通道数out_channel(重点,和上面的残差是不一样的)
out_channels -= in_channels
bottleneck_channels = out_channels // 4
# (1)右分支
# 1*1 GConv
x = group_conv(inputs, bottleneck_channels, kernel=1, strides=1, num_groups=num_groups)
x = BatchNormalization()(x)
x = Activation('relu')(x)
# Channel Shuffle
x = channel_shuffle(x, num_groups)
# 3*3 DWConv,stide=2
x = depthwise_conv_bn(x, kernel_size=3, stride=2)
# 1*1 GConv
x = group_conv(x, out_channels, kernel=1, strides=1, num_groups=num_groups)
x = BatchNormalization()(x)
# (2)左分支:3*3 AVG Pool,stride=2
y = AvgPool2D(pool_size=3, strides=2, padding='same')(inputs)
# 在通道维度上堆叠
x = concatenate([y, x], axis=-1)
x = Activation('relu')(x)
return x
def stage(inputs, out_channels, num_groups, n):
# 每个stage中的第一个block的stride=2(即下采样模块),其他block的stride=1(即基本模块)
# 都是按照论文搭建的,要去看论文原文,要不你绝对不理解为什么这样搭建,嘿嘿。
x = shuffleNetUnitB(inputs, out_channels, num_groups)
for _ in range(n):
x = shuffleNetUnitA(x, num_groups)
return x
# 网络骨干搭建
# first_stage_channels为第一个stage的输出通道数
# num_groups为分组数量
def ShuffleNet(inputs, first_stage_channels, num_groups):
# 构建网络输入tensor
inputs = tf.keras.Input(shape=inputs)
# 论文中先用了一个普通卷积和池化
x = Conv2D(filters=24,
kernel_size=3,
strides=2,
padding='same')(inputs)
x = MaxPool2D(pool_size=3, strides=2, padding='same')(x)
# 三个stage,每个stage的第一个block的stride=2
# 同一个stage内的其他超参数不变,下一个stage的输出通道数加倍(这个可以通过论文中的表格看出,原文也给了)
# n为分组卷积的分组数量,论文中用g表示
x = stage(x, first_stage_channels, num_groups, n=3)
x = stage(x, first_stage_channels * 2, num_groups, n=7)
x = stage(x, first_stage_channels * 4, num_groups, n=3)
x = GlobalAveragePooling2D()(x)
# 我看过其他大佬的文章说compile的时候再用softmax,那样更稳定,有时间再试试吧
x = Dense(1000, activation='softmax')(x)
# 完整网络架构
model = Model(inputs=inputs, outputs=x)
return model
model = ShuffleNet(inputs=[224, 224, 3], first_stage_channels=240, num_groups=3)
model.summary()
plot_model(model, to_file='img/ShuffleNet-V1.png', show_shapes=True)
此差异已折叠。
import tensorflow as tf
from tensorflow.keras import layers, Model
class ConvBNReLU(layers.Layer):
def __init__(self,
filters: int = 1,
kernel_size: int = 1,
strides: int = 1,
padding: str = 'same',
**kwargs):
super(ConvBNReLU, self).__init__(**kwargs)
self.conv = layers.Conv2D(filters=filters,
kernel_size=kernel_size,
strides=strides,
padding=padding,
use_bias=False,
kernel_regularizer=tf.keras.regularizers.l2(4e-5),
name="conv1")
self.bn = layers.BatchNormalization(momentum=0.9, name="bn")
self.relu = layers.ReLU()
def call(self, inputs, training=None, **kwargs):
x = self.conv(inputs)
x = self.bn(x, training=training)
x = self.relu(x)
return x
class DWConvBN(layers.Layer):
def __init__(self,
kernel_size: int = 3,
strides: int = 1,
padding: str = 'same',
**kwargs):
super(DWConvBN, self).__init__(**kwargs)
self.dw_conv = layers.DepthwiseConv2D(kernel_size=kernel_size,
strides=strides,
padding=padding,
use_bias=False,
kernel_regularizer=tf.keras.regularizers.l2(4e-5),
name="dw1")
self.bn = layers.BatchNormalization(momentum=0.9, name="bn")
def call(self, inputs, training=None, **kwargs):
x = self.dw_conv(inputs)
x = self.bn(x, training=training)
return x
# 通道重排
class ChannelShuffle(layers.Layer):
def __init__(self, shape, groups: int = 2, **kwargs):
super(ChannelShuffle, self).__init__(**kwargs)
batch_size, height, width, num_channels = shape
assert num_channels % 2 == 0
channel_per_group = num_channels // groups
# Tuple of integers, does not include the samples dimension (batch size).
self.reshape1 = layers.Reshape((height, width, groups, channel_per_group))
self.reshape2 = layers.Reshape((height, width, num_channels))
def call(self, inputs, **kwargs):
x = self.reshape1(inputs)
x = tf.transpose(x, perm=[0, 1, 2, 4, 3])
x = self.reshape2(x)
return x
class ChannelSplit(layers.Layer):
def __init__(self, num_splits: int = 2, **kwargs):
super(ChannelSplit, self).__init__(**kwargs)
self.num_splits = num_splits
def call(self, inputs, **kwargs):
b1, b2 = tf.split(inputs,
num_or_size_splits=self.num_splits,
axis=-1)
return b1, b2
def shuffle_block_s1(inputs, output_c: int, stride: int, prefix: str):
if stride != 1:
raise ValueError("illegal stride value.")
assert output_c % 2 == 0
branch_c = output_c // 2
x1, x2 = ChannelSplit(name=prefix + "/split")(inputs)
# main branch
x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(x2)
x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2)
x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2)
x = layers.Concatenate(name=prefix + "/concat")([x1, x2])
x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x)
return x
def shuffle_block_s2(inputs, output_c: int, stride: int, prefix: str):
if stride != 2:
raise ValueError("illegal stride value.")
assert output_c % 2 == 0
branch_c = output_c // 2
# shortcut branch
x1 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b1_dw1")(inputs)
x1 = ConvBNReLU(filters=branch_c, name=prefix + "/b1_conv1")(x1)
# main branch
x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv1")(inputs)
x2 = DWConvBN(kernel_size=3, strides=stride, name=prefix + "/b2_dw1")(x2)
x2 = ConvBNReLU(filters=branch_c, name=prefix + "/b2_conv2")(x2)
x = layers.Concatenate(name=prefix + "/concat")([x1, x2])
x = ChannelShuffle(x.shape, name=prefix + "/channelshuffle")(x)
return x
def shufflenet_v2(num_classes: int,
input_shape: tuple,
stages_repeats: list,
stages_out_channels: list):
img_input = layers.Input(shape=input_shape)
if len(stages_repeats) != 3:
raise ValueError("expected stages_repeats as list of 3 positive ints")
if len(stages_out_channels) != 5:
raise ValueError("expected stages_out_channels as list of 5 positive ints")
x = ConvBNReLU(filters=stages_out_channels[0],
kernel_size=3,
strides=2,
name="conv1")(img_input)
x = layers.MaxPooling2D(pool_size=(3, 3),
strides=2,
padding='same',
name="maxpool")(x)
stage_name = ["stage{}".format(i) for i in [2, 3, 4]]
for name, repeats, output_channels in zip(stage_name,
stages_repeats,
stages_out_channels[1:]):
for i in range(repeats):
if i == 0:
x = shuffle_block_s2(x, output_c=output_channels, stride=2, prefix=name + "_{}".format(i))
else:
x = shuffle_block_s1(x, output_c=output_channels, stride=1, prefix=name + "_{}".format(i))
x = ConvBNReLU(filters=stages_out_channels[-1], name="conv5")(x)
x = layers.GlobalAveragePooling2D(name="globalpool")(x)
x = layers.Dense(units=num_classes, name="fc")(x)
x = layers.Softmax()(x)
model = Model(img_input, x, name="ShuffleNetV2_1.0")
return model
def shufflenet_v2_x1_0(num_classes=1000, input_shape=(224, 224, 3)):
# 权重链接: https://pan.baidu.com/s/1M2mp98Si9eT9qT436DcdOw 密码: mhts
model = shufflenet_v2(num_classes=num_classes,
input_shape=input_shape,
stages_repeats=[4, 8, 4],
stages_out_channels=[24, 116, 232, 464, 1024])
return model
def shufflenet_v2_x0_5(num_classes=1000, input_shape=(224, 224, 3)):
model = shufflenet_v2(num_classes=num_classes,
input_shape=input_shape,
stages_repeats=[4, 8, 4],
stages_out_channels=[24, 48, 96, 192, 1024])
return model
def shufflenet_v2_x2_0(num_classes=1000, input_shape=(224, 224, 3)):
model = shufflenet_v2(num_classes=num_classes,
input_shape=input_shape,
stages_repeats=[4, 8, 4],
stages_out_channels=[24, 244, 488, 976, 2048])
return model
\ No newline at end of file
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册