RegNetX架构复现--CVPR2020

c866cb78 · 别团等shy哥发育 · be51173c · c866cb78 · be51173c · c866cb78
4 changed file
--- a/图像识别/03 多分类案例 CIFAR10/CIFAR10 案例个人实现.ipynb
+++ b/图像识别/03 多分类案例 CIFAR10/CIFAR10 案例个人实现.ipynb
--- a/图像识别/03 多分类案例 CIFAR10/model_1.png
+++ b/图像识别/03 多分类案例 CIFAR10/model_1.png
--- a/图像识别/adidas and nike二分类/鞋子识别.py
+++ b/图像识别/adidas and nike二分类/鞋子识别.py
@@ -3,8 +3,10 @@ import tensorflow as tf
 import os
 import datetime
 from tensorflow.keras.preprocessing.image import ImageDataGenerator
+from tensorflow.keras.applications import MobileNet
 from tensorflow.keras.utils import to_categorical
 from tensorflow.keras.models import Sequential
+from tensorflow.keras import layers
 from tensorflow.keras.layers import Dense, Dropout, Conv2D, MaxPool2D, Flatten
 from tensorflow.keras.optimizers import Adam
 # 设置GPU显存按需申请
@@ -40,7 +42,15 @@ train_datagen = ImageDataGenerator(
 )
 # 验证集数据只需要归一化就可以
 val_datagen = ImageDataGenerator(
+    rotation_range=20,  # 随机旋转度数
+    width_shift_range=0.1,  # 随机水平平移
+    height_shift_range=0.1,  # 随机竖直平移
    rescale=1 / 255,  # 数据归一化
+    shear_range=10,  # 随机错切变换
+    zoom_range=0.1,  # 随机放大
+    horizontal_flip=True,  # 水平翻转
+    brightness_range=(0.7, 1.3),  # 亮度变化
+    fill_mode='nearest',  # 填充方式
 )
 # 测试集数据只需要归一化就可以
 test_datagen = ImageDataGenerator(
@@ -73,27 +83,38 @@ print(train_generator.class_indices)
 # # x_train,y_train=next(it)
 # # y_train
 # AlexNet
-model = Sequential()
+# model = Sequential()
-# 卷积层
+# # 卷积层
-model.add(
+# model.add(
-    Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), padding='valid', input_shape=(image_size, image_size, 3),
+#     Conv2D(filters=96, kernel_size=(11, 11), strides=(4, 4), padding='valid', input_shape=(image_size, image_size, 3),
-           activation='relu'))
+#            activation='relu'))
-model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
+# model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
-model.add(Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), padding='same', activation='relu'))
+# model.add(Conv2D(filters=256, kernel_size=(5, 5), strides=(1, 1), padding='same', activation='relu'))
-model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
+# model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
-model.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
+# model.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
-model.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
+# model.add(Conv2D(filters=384, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
-model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
+# model.add(Conv2D(filters=256, kernel_size=(3, 3), strides=(1, 1), padding='same', activation='relu'))
-model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
+# model.add(MaxPool2D(pool_size=(3, 3), strides=(2, 2), padding='valid'))
-# 全连接层
+# # 全连接层
-model.add(Flatten())
+# model.add(Flatten())
-model.add(Dense(4096, activation='relu'))
+# model.add(Dense(4096, activation='relu'))
-model.add(Dropout(0.5))
+# model.add(Dropout(0.5))
-model.add(Dense(4096, activation='relu'))
+# model.add(Dense(4096, activation='relu'))
-model.add(Dropout(0.5))
+# model.add(Dropout(0.5))
-model.add(Dense(num_classes, activation='softmax'))
+# model.add(Dense(num_classes, activation='softmax'))
+model = MobileNet(
+    input_shape=(224, 224, 3), alpha=1.0, depth_multiplier=1, dropout=0.001,
+    include_top=False, weights='imagenet', input_tensor=None, pooling=None,
+    classes=num_classes
+)
+model = Sequential([
+    model,
+    layers.GlobalAveragePooling2D(),
+    Dense(num_classes, activation='softmax')
+])
 # 模型概要
-model.summary()
+# model.summary()
 # plot_model(model, to_file='AlexNet—鞋子分类.png', show_shapes=True)
 # 学习率调节函数，逐渐减小学习率
@@ -108,15 +129,17 @@ def adjust_learning_rate(epoch):
    else:
        lr = 1e-6
    return lr
 # 定义优化器
 adam = Adam(lr=1e-4)
 # 定义学习率衰减策略
-learningRateSchedular=LearningRateScheduler(adjust_learning_rate)
+learningRateSchedular = LearningRateScheduler(adjust_learning_rate)
 # 定义TensorBoard
 # 日志保存文件夹的格式
-logdir = os.path.join('logdir', datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
+# logdir = os.path.join('logdir', datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
-tensorboar_callback = tf.keras.callbacks.TensorBoard(logdir)  # 定义callback
+# tensorboar_callback = tf.keras.callbacks.TensorBoard(logdir)  # 定义callback
 # 定义优化器，loss function，训练过程中计算准确率
 model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy'])
@@ -124,7 +147,7 @@ model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accurac
 history = model.fit(x=train_generator,
                    epochs=epochs,
                    validation_data=val_generator,
-                    callbacks=[tensorboar_callback,learningRateSchedular])
+                    callbacks=[learningRateSchedular])
 # 显示训练集和验证集的acc和loss曲线
 acc = history.history['accuracy']
@@ -145,23 +168,15 @@ plt.title('Training and Validation Loss')
 plt.legend()
 plt.show()
 # 模型预测
 # adidass_(28).jpg    Image_190.jpg
-test_img=tf.keras.preprocessing.image.load_img('adidass_(28).jpg ',target_size=(image_size,image_size))
+test_img = tf.keras.preprocessing.image.load_img('adidass_(28).jpg ', target_size=(image_size, image_size))
 print(test_img)
-test_img=tf.keras.preprocessing.image.img_to_array(test_img) # 类型变换
+test_img = tf.keras.preprocessing.image.img_to_array(test_img)  # 类型变换
 print(test_img.shape)
-test_img=tf.expand_dims(test_img,0)  # 扩充一维
+test_img = tf.expand_dims(test_img, 0)  # 扩充一维
 print(test_img.shape)
-preds=model.predict(test_img)  # 预测
+preds = model.predict(test_img)  # 预测
 print(preds.shape)
-print('预测结果:',preds)
+print('预测结果:', preds)
--- a/经典网络/RegNet--CVPR2020/RegNet.py
+++ b/经典网络/RegNet--CVPR2020/RegNet.py
+import tensorflow as tf
+from tensorflow.keras import layers
+from tensorflow.keras.models import Model
+MODEL_CONFIGS = {
+    "x002": {
+        "depths": [1, 1, 4, 7],
+        "widths": [24, 56, 152, 368],
+        "group_width": 8,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x004": {
+        "depths": [1, 2, 7, 12],
+        "widths": [32, 64, 160, 384],
+        "group_width": 16,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x006": {
+        "depths": [1, 3, 5, 7],
+        "widths": [48, 96, 240, 528],
+        "group_width": 24,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x008": {
+        "depths": [1, 3, 7, 5],
+        "widths": [64, 128, 288, 672],
+        "group_width": 16,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x016": {
+        "depths": [2, 4, 10, 2],
+        "widths": [72, 168, 408, 912],
+        "group_width": 24,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x032": {
+        "depths": [2, 6, 15, 2],
+        "widths": [96, 192, 432, 1008],
+        "group_width": 48,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x040": {
+        "depths": [2, 5, 14, 2],
+        "widths": [80, 240, 560, 1360],
+        "group_width": 40,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x064": {
+        "depths": [2, 4, 10, 1],
+        "widths": [168, 392, 784, 1624],
+        "group_width": 56,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x080": {
+        "depths": [2, 5, 15, 1],
+        "widths": [80, 240, 720, 1920],
+        "group_width": 120,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x120": {
+        "depths": [2, 5, 11, 1],
+        "widths": [224, 448, 896, 2240],
+        "group_width": 112,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x160": {
+        "depths": [2, 6, 13, 1],
+        "widths": [256, 512, 896, 2048],
+        "group_width": 128,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "x320": {
+        "depths": [2, 7, 13, 1],
+        "widths": [336, 672, 1344, 2520],
+        "group_width": 168,
+        "default_size": 224,
+        "block_type": "X"
+    },
+    "y002": {
+        "depths": [1, 1, 4, 7],
+        "widths": [24, 56, 152, 368],
+        "group_width": 8,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y004": {
+        "depths": [1, 3, 6, 6],
+        "widths": [48, 104, 208, 440],
+        "group_width": 8,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y006": {
+        "depths": [1, 3, 7, 4],
+        "widths": [48, 112, 256, 608],
+        "group_width": 16,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y008": {
+        "depths": [1, 3, 8, 2],
+        "widths": [64, 128, 320, 768],
+        "group_width": 16,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y016": {
+        "depths": [2, 6, 17, 2],
+        "widths": [48, 120, 336, 888],
+        "group_width": 24,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y032": {
+        "depths": [2, 5, 13, 1],
+        "widths": [72, 216, 576, 1512],
+        "group_width": 24,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y040": {
+        "depths": [2, 6, 12, 2],
+        "widths": [128, 192, 512, 1088],
+        "group_width": 64,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y064": {
+        "depths": [2, 7, 14, 2],
+        "widths": [144, 288, 576, 1296],
+        "group_width": 72,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y080": {
+        "depths": [2, 4, 10, 1],
+        "widths": [168, 448, 896, 2016],
+        "group_width": 56,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y120": {
+        "depths": [2, 5, 11, 1],
+        "widths": [224, 448, 896, 2240],
+        "group_width": 112,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y160": {
+        "depths": [2, 4, 11, 1],
+        "widths": [224, 448, 1232, 3024],
+        "group_width": 112,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+    "y320": {
+        "depths": [2, 5, 12, 1],
+        "widths": [232, 696, 1392, 3712],
+        "group_width": 232,
+        "default_size": 224,
+        "block_type": "Y"
+    },
+}
+# 将输入重新缩放并归一化为[0,1]和ImageNet均值和std
+def PreStem(x, name=None):
+    x = layers.experimental.preprocessing.Rescaling(1. / 255.)(x)
+    return x
+# 论文中的stem(stride=2 3x3卷积+w0 = 32 output channels）
+def Stem(x, name=None):
+    x = layers.Conv2D(32,
+                      (3, 3),
+                      strides=2,
+                      use_bias=False,
+                      padding='same',
+                      kernel_initializer='he_normal',
+                      name=name + '_stem_conv')(x)
+    x = layers.BatchNormalization(
+        momentum=0.9, epsilon=1e-5, name=name + "_stem_bn")(x)
+    x = layers.ReLU(name=name + '_stem_relu')(x)
+    return x
+# XBlock实现:1x1卷积+3x3分组卷积+1x1卷积(conv后跟BN+ReLU)
+def XBlock(inputs, filters_in, filters_out, group_width, stride=1, name=None):
+    # declare layers
+    groups = filters_out // group_width
+    # 当stide=2的时候，残差边需要使用1x1卷积降维处理保持shape一致
+    if stride != 1:
+        skip = layers.Conv2D(
+            filters_out,
+            (1, 1),
+            strides=stride,
+            use_bias=False,
+            kernel_initializer='he_normal',
+            name=name + '_skip_1x1')(inputs)
+        skip = layers.BatchNormalization(
+            momentum=0.9, epsilon=1e-5, name=name + "_skip_bn")(skip)
+    else:
+        skip = inputs
+    # build block
+    # conv_1x1_1
+    x = layers.Conv2D(
+        filters_out,
+        (1, 1),
+        use_bias=False,
+        kernel_initializer='he_normal',
+        name=name + '_conv_1x1_1')(inputs)
+    x = layers.BatchNormalization(
+        momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_1_bn")(x)
+    x = layers.ReLU(name=name + "_conv_1x1_1_relu")(x)
+    # group conv_3x3
+    x = layers.Conv2D(
+        filters_out,
+        (3, 3),
+        use_bias=False,
+        strides=stride,
+        groups=groups,
+        padding='same',
+        kernel_initializer='he_normal',
+        name=name + '_conv_3x3')(x)
+    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=name + "_conv_3x3_bn")(x)
+    x = layers.ReLU(name=name + "_conv_3x3_relu")(x)
+    # conv_1x1_2
+    x = layers.Conv2D(
+        filters_out, (1, 1),
+        use_bias=False,
+        kernel_initializer="he_normal",
+        name=name + "_conv_1x1_2")(x)
+    x = layers.BatchNormalization(momentum=0.9, epsilon=1e-5, name=name + "_conv_1x1_2_bn")(x)
+    x = layers.ReLU(name=name + "_exit_relu")(x + skip)
+    return x
+def Stage(inputs,
+          block_type,  # 必须是X、Y、Z之一
+          depth,  # stage深度，要使用的块数
+          group_width,  # 本stage所有块的group宽度
+          filters_in,  #
+          filters_out,
+          name=None):  # 名称前缀
+    x = inputs
+    if block_type == "X":
+        # 论文原话：Stage的第一个block的步长为2
+        x = XBlock(
+            x,
+            filters_in,
+            filters_out,
+            group_width,
+            stride=2,
+            name=f"{name}_XBlock_0")
+        for i in range(1, depth):
+            x = XBlock(x,filters_out, filters_out, group_width, name=f"{name}_XBlock_{i}")
+    # TODO YBlock,ZBlock
+    return x
+def Head(x, num_classes=1000, name=None):
+    x = layers.GlobalAveragePooling2D(name=name+'_head_gap')(x)
+    x = layers.Dense(num_classes, name=name+'head_dense')(x)
+    return x
+def RegNet(depths,  # 每个stage的深度
+           widths,  # 块宽度(输出通道数)
+           group_width,  # 每组中要使用的通道数
+           block_type,  # "X","Y","Z"之一
+           default_size,  # 默认输入图像大小
+           model_name='regnet',  # 模型的可选名称
+           include_preprocessing=True,  # 是否包含预处理
+           include_top=True,  # 是否包含分类头
+           weights='imagenet',
+           input_tensor=None,
+           input_shape=None,
+           pooling=None,
+           classes=1000,  # 可选的类数量
+           classifier_activation='softmax'):  # 分类器激活
+    img_input = layers.Input(shape=input_shape)
+    inputs = img_input
+    x = inputs
+    if include_preprocessing:
+        x = PreStem(x, name=model_name)
+    x = Stem(x, name=model_name)
+    in_channels = 32  # Output from Stem
+    for num_stage in range(4):
+        depth = depths[num_stage]
+        out_channels = widths[num_stage]
+        x = Stage(x,
+                  block_type,
+                  depth,
+                  group_width,
+                  in_channels,
+                  out_channels,
+                  name=model_name + '_Stage_' + str(num_stage))
+        in_channels = out_channels
+    if include_top:
+        x = Head(x, num_classes=classes,name='head')
+    else:
+        if pooling == 'avg':
+            x = layers.GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x - layers.GlobalMaxPooling2D()(x)
+    model = Model(inputs=inputs, outputs=x, name=model_name)
+    return model
+'''
+"x002": {
+    "depths": [1, 1, 4, 7],
+    "widths": [24, 56, 152, 368],
+    "group_width": 8,
+    "default_size": 224,
+    "block_type": "X"
+}
+'''
+def RegNetX002(model_name='regnetx002',
+               include_top=True,
+               include_preprocessing=True,
+               weights='imagenet',
+               input_tensor=None,
+               input_shape=None,
+               pooling=None,
+               classes=1000,
+               classifier_activation='softmax'):
+    return RegNet(
+        MODEL_CONFIGS['x002']['depths'],
+        MODEL_CONFIGS['x002']['widths'],
+        MODEL_CONFIGS['x002']['group_width'],
+        MODEL_CONFIGS['x002']['block_type'],
+        MODEL_CONFIGS['x002']['default_size'],
+        model_name=model_name,
+        include_top=include_top,
+        include_preprocessing=include_preprocessing,
+        weights=weights,
+        input_tensor=input_tensor,
+        input_shape=input_shape,
+        pooling=pooling,
+        classes=classes,
+        classifier_activation=classifier_activation
+    )
+if __name__ == '__main__':
+    model = RegNetX002(input_shape=(224, 224, 3))
+    model.summary()