diff --git "a/\347\273\217\345\205\270\347\275\221\347\273\234/ConvNext/ConvNext.py" "b/\347\273\217\345\205\270\347\275\221\347\273\234/ConvNext/ConvNext.py" new file mode 100644 index 0000000000000000000000000000000000000000..f2c63dee3f4010b16c4327b6856f8e2e3a7bc3c0 --- /dev/null +++ "b/\347\273\217\345\205\270\347\275\221\347\273\234/ConvNext/ConvNext.py" @@ -0,0 +1,254 @@ +import tensorflow as tf +import numpy as np +from tensorflow.keras import layers +from tensorflow.keras.models import Model + +MODEL_CONFIGS = { + "tiny": { + "depths": [3, 3, 9, 3], + "projection_dims": [96, 192, 384, 768], + "default_size": 224, + }, + "small": { + "depths": [3, 3, 27, 3], + "projection_dims": [96, 192, 384, 768], + "default_size": 224, + }, + "base": { + "depths": [3, 3, 27, 3], + "projection_dims": [128, 256, 512, 1024], + "default_size": 224, + }, + "large": { + "depths": [3, 3, 27, 3], + "projection_dims": [192, 384, 768, 1536], + "default_size": 224, + }, + "xlarge": { + "depths": [3, 3, 27, 3], + "projection_dims": [256, 512, 1024, 2048], + "default_size": 224, + }, +} + + +# Layer scale module +class LayerScale(layers.Layer): + def __init__(self, init_values, projection_dim, **kwargs): + super().__init__(**kwargs) + self.init_values = init_values + self.projection_dim = projection_dim + + def build(self, input_shape): + self.gamma = tf.Variable(self.init_values * tf.ones((self.projection_dim,))) + + def call(self, x): + return x * self.gamma + + def get_config(self): + config = super().get_config() + config.update( + { + "init_values": self.init_values, + "projection_dim": self.projection_dim + } + ) + return config + + +# 随机深度模块 +''' + drop_path_rate (float):丢弃路径的概率。应该在[0, 1]。 + 返回:残差路径丢弃或保留的张量。 +''' +class StochasticDepth(layers.Layer): + def __init__(self, drop_path_rate, **kwargs): + super().__init__(**kwargs) + self.drop_path_rate = drop_path_rate + + def call(self, x, training=None): + if training: + keep_prob = 1 - self.drop_path_rate + shape = (tf.shape(x)[0],) + (1,) * (len(tf.shape(x)) - 1) + random_tensor = keep_prob + tf.random.uniform(shape, 0, 1) + random_tensor = tf.floor(random_tensor) + return (x / keep_prob) * random_tensor + return x + + def get_config(self): + config = super().get_config() + config.update({"drop_path_rate": self.drop_path_rate}) + return config + + +# 使用ImageNet-1k均值和std对输入进行归一化 +# def PreStem(inputs, name=None): +# 源码中tensorflow的版本为2.9.1,这里没有layers.Normalization()这个接口 + + +# ConvNext Block +def ConvNextBlock(inputs, + projection_dim, # 卷积层的filters数量 + drop_path_rate=0.0, # 丢弃路径的概率。 + layer_scale_init_value=1e-6, + name=None): + x = inputs + # Depthwise卷积是分组卷积的一种特殊情况:当分组数=通道数 + x = layers.Conv2D(filters=projection_dim, + kernel_size=(7, 7), + padding='same', + groups=projection_dim, + name=name + '_depthwise_conv')(x) + x = layers.LayerNormalization(epsilon=1e-6, name=name + '_layernorm')(x) + x = layers.Dense(4 * projection_dim, name=name + '_pointwise_conv_1')(x) + x = layers.Activation('gelu', name=name + '_gelu')(x) + x = layers.Dense(projection_dim, name=name + '_pointwise_conv_2')(x) + + if layer_scale_init_value is not None: + # Layer scale module + x = LayerScale(layer_scale_init_value, projection_dim, name=name + '_layer_scale')(x) + if drop_path_rate: + # 随机深度模块 + layer = StochasticDepth(drop_path_rate, name=name + '_stochastic_depth') + else: + layer = layers.Activation('linear', name=name + '_identity') + + return layers.Add()([inputs, layer(x)]) + + +# ConvNext architecture +def ConvNext(depths, # tiny:[3,3,9,3] + projection_dims, # tiny:[96, 192, 384, 768], + drop_path_rate=0.0, # 随机深度概率,如果为0.0,图层缩放不会被使用 + layer_scale_init_value=1e-6, # 缩放比例 + default_size=224, # 默认输入图像大小 + model_name='convnext', # 模型的可选名称 + include_preprocessing=True, # 是否包含预处理 + include_top=True, # 是否包含分类头 + weights=None, + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, # 分类个数 + classifier_activation='softmax'): # 分类器激活 + img_input = layers.Input(shape=input_shape) + + inputs = img_input + x = inputs + + # if include_preprocessing: + # x = PreStem(x, name=model_name) + + # Stem block:4*4,96,stride=4 + stem = tf.keras.Sequential( + [ + layers.Conv2D(projection_dims[0], + kernel_size=(4, 4), + strides=4, + name=model_name + '_stem_conv'), + layers.LayerNormalization(epsilon=1e-6, name=model_name + '_stem_layernorm') + ], + name=model_name + '_stem' + ) + + # Downsampling blocks + downsample_layers = [] + downsample_layers.append(stem) + + num_downsample_layers = 3 + for i in range(num_downsample_layers): + downsample_layer = tf.keras.Sequential( + [ + layers.LayerNormalization(epsilon=1e-6, name=model_name + '_downsampling_layernorm_' + str(i)), + layers.Conv2D(projection_dims[i + 1], + kernel_size=(2, 2), + strides=2, + name=model_name + '_downsampling_conv_' + str(i)) + ], + name=model_name + '_downsampling_block_' + str(i) + ) + downsample_layers.append(downsample_layer) + + # Stochastic depth schedule. + # This is referred from the original ConvNeXt codebase: + # https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py#L86 + depth_drop_rates = [ + float(x) for x in np.linspace(0.0, drop_path_rate, sum(depths)) + ] + + # First apply downsampling blocks and then apply ConvNeXt stages. + cur = 0 + + num_convnext_blocks = 4 + for i in range(num_convnext_blocks): + x = downsample_layers[i](x) + for j in range(depths[i]): # depth:[3,3,9,3] + x = ConvNextBlock(x, + projection_dim=projection_dims[i], + drop_path_rate=depth_drop_rates[cur + j], + layer_scale_init_value=layer_scale_init_value, + name=model_name + f"_stage_{i}_block_{j}") + cur += depths[i] + if include_top: + x = layers.GlobalAveragePooling2D(name=model_name + '_head_gap')(x) + x = layers.LayerNormalization(epsilon=1e-6, name=model_name + '_head_layernorm')(x) + x = layers.Dense(classes, name=model_name + '_head_dense')(x) + else: + if pooling == 'avg': + x = layers.GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = layers.GlobalMaxPooling2D()(x) + x = layers.LayerNormalization(epsilon=1e-6)(x) + + model = Model(inputs=inputs, outputs=x, name=model_name) + # Load weights. + # if weights == "imagenet": + # if include_top: + # file_suffix = ".h5" + # file_hash = WEIGHTS_HASHES[model_name][0] + # else: + # file_suffix = "_notop.h5" + # file_hash = WEIGHTS_HASHES[model_name][1] + # file_name = model_name + file_suffix + # weights_path = utils.data_utils.get_file( + # file_name, + # BASE_WEIGHTS_PATH + file_name, + # cache_subdir="models", + # file_hash=file_hash, + # ) + # model.load_weights(weights_path) + # elif weights is not None: + # model.load_weights(weights) + + return model + + +def ConvNextTiny(model_name='convnext-tiny', + include_top=True, + include_processing=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000, + classifier_activation='softmax'): + return ConvNext(depths=MODEL_CONFIGS['tiny']['depths'], + projection_dims=MODEL_CONFIGS['tiny']['projection_dims'], + drop_path_rate=0.0, + layer_scale_init_value=1e-6, + default_size=MODEL_CONFIGS["tiny"]['default_size'], + model_name=model_name, + include_top=include_top, + include_preprocessing=include_processing, + weights=weights, + input_tensor=input_tensor, + input_shape=input_shape, + pooling=pooling, + classes=classes, + classifier_activation=classifier_activation + ) + + +if __name__ == '__main__': + model = ConvNextTiny(input_shape=(224, 224, 3)) + model.summary()