from __future__ import absolute_import from __future__ import division from __future__ import print_function import oneflow as flow BLOCK_COUNTS = [3, 4, 6, 3] BLOCK_FILTERS = [256, 512, 1024, 2048] BLOCK_FILTERS_INNER = [64, 128, 256, 512] def _conv2d( name, input, filters, kernel_size, strides=1, padding="SAME", data_format="NCHW", dilations=1, weight_initializer=flow.variance_scaling_initializer( 2, 'fan_in', 'random_normal', data_format="NCHW"), weight_regularizer=flow.regularizers.l2(1.0/32768), ): weight = flow.get_variable( name + "-weight", shape=(filters, input.shape[1], kernel_size, kernel_size), dtype=input.dtype, initializer=weight_initializer, regularizer=weight_regularizer, model_name="weight", trainable=trainable, ) return flow.nn.conv2d( input, weight, strides, padding, data_format, dilations, name=name ) def _batch_norm(inputs, name=None, trainable=True): return flow.layers.batch_normalization( inputs=inputs, axis=1, momentum=0.9, # 97, epsilon=1.001e-5, center=True, scale=True, trainable=trainable, name=name, ) def conv2d_affine(input, name, filters, kernel_size, strides, activation=None): # input data_format must be NCHW, cannot check now padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID" output = _conv2d(name, input, filters, kernel_size, strides, padding) output = _batch_norm(output, name + "_bn") if activation == "Relu": output = flow.keras.activations.relu(output) return output def bottleneck_transformation(input, block_name, filters, filters_inner, strides): a = conv2d_affine( input, block_name + "_branch2a", filters_inner, 1, 1, activation="Relu", ) b = conv2d_affine( a, block_name + "_branch2b", filters_inner, 3, strides, activation="Relu", ) c = conv2d_affine(b, block_name + "_branch2c", filters, 1, 1) return c def residual_block(input, block_name, filters, filters_inner, strides_init): if strides_init != 1 or block_name == "res2_0": shortcut = conv2d_affine( input, block_name + "_branch1", filters, 1, strides_init ) else: shortcut = input bottleneck = bottleneck_transformation( input, block_name, filters, filters_inner, strides_init ) return flow.keras.activations.relu(bottleneck + shortcut) def residual_stage(input, stage_name, counts, filters, filters_inner, stride_init=2): output = input for i in range(counts): block_name = "%s_%d" % (stage_name, i) output = residual_block( output, block_name, filters, filters_inner, stride_init if i == 0 else 1, ) return output def resnet_conv_x_body(input, on_stage_end=lambda x: x): output = input for i, (counts, filters, filters_inner) in enumerate( zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER) ): stage_name = "res%d" % (i + 2) output = residual_stage( output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2, ) on_stage_end(output) return output def resnet_stem(input): conv1 = _conv2d("conv1", input, 64, 7, 2) conv1_bn = flow.keras.activations.relu(_batch_norm(conv1, "conv1_bn")) pool1 = flow.nn.max_pool2d( conv1_bn, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1", ) return pool1 def resnet50(images, trainable=True, need_transpose=False): # note: images.shape = (N C H W) in cc's new dataloader, transpose is not needed anymore if need_transpose: images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2]) with flow.deprecated.variable_scope("Resnet"): stem = resnet_stem(images) body = resnet_conv_x_body(stem, lambda x: x) pool5 = flow.nn.avg_pool2d( body, ksize=7, strides=1, padding="VALID", data_format="NCHW", name="pool5", ) fc1001 = flow.layers.dense( flow.reshape(pool5, (pool5.shape[0], -1)), units=1000, use_bias=True, kernel_initializer=flow.variance_scaling_initializer( 2, 'fan_in', 'random_normal'), # kernel_initializer=flow.xavier_uniform_initializer(), bias_initializer=flow.zeros_initializer(), kernel_regularizer=flow.regularizers.l2(1.0/32768), trainable=trainable, name="fc1001", ) return fc1001