Merge pull request #41 from Oneflow-Inc/hongshen

train VGG-16

Merge pull request #41 from Oneflow-Inc/hongshen
train VGG-16
d1f0ee1b · xiaomuchongwhs · GitHub · 64c06019 · 8919b54f · d1f0ee1b
5 changed file
--- a/Classification/cnns/README.md
+++ b/Classification/cnns/README.md
@@ -606,4 +606,33 @@ python3 cnn_benchmark/of_cnn_train_val.py \
 ```

 The top1 accuracy and the top5 acuuracy are 54.762% and 78.1914%, respectively for our oneflow model after 90 epochs of training.
-For reference, the top1 accuracy and the top5 accuracy are 54.6% and 78.33%, respectively for the model from the tensorflow benchmarks after 90 epochs of training.
\ No newline at end of file
+For reference, the top1 accuracy and the top5 accuracy are 54.6% and 78.33%, respectively for the model from the tensorflow benchmarks after 90 epochs of training.
+
+#### 训练 VGG-16
+```
+export ENABLE_USER_OP=True
+rm -rf core.* 
+rm -rf ./output/snapshots/*
+DATA_ROOT=/dataset/ImageNet/ofrecord
+#Please change this to your data root.
+python3 cnn_benchmark/of_cnn_train_val.py \
+    --train_data_dir=$DATA_ROOT/train \
+    --val_data_dir=$DATA_ROOT/validation \
+    --train_data_part_num=256 \
+    --val_data_part_num=256 \
+    --num_nodes=1 \
+    --gpu_num_per_node=4 \
+    --model_update="momentum" \
+    --mom=0.9 \
+    --learning_rate=0.01 \
+    --loss_print_every_n_iter=10 \
+    --batch_size_per_device=128 \
+    --val_batch_size_per_device=128 \
+    --num_epoch=90 \
+    --use_fp16=false \
+    --use_boxing_v2=false \
+    --model="vgg" \
+```
+
+The top1 accuracy and the top5 acuuracy are 69.3359% and 89.1370%, respectively for our oneflow model after 90 epochs of training.
+For reference, the top1 accuracy and the top5 accuracy are 71.5% and 89.9%, respectively for the model from the tensorflow benchmarks after 90 epochs of training.
--- a/Classification/cnns/job_function_util.py
+++ b/Classification/cnns/job_function_util.py
@@ -19,14 +19,8 @@ def get_train_config(args):
    train_config = _default_config(args)
    train_config.train.primary_lr(args.learning_rate)
    train_config.disable_all_reduce_sequence(False)
-    # train_config.cudnn_conv_enable_pseudo_half(True)
    train_config.all_reduce_group_min_mbyte(8)
    train_config.all_reduce_group_num(128)
-    # train_config.all_reduce_lazy_ratio(0)
-
-    # train_config.enable_nccl_hierarchical_all_reduce(True)
-    # train_config.cudnn_buf_limit_mbyte(2048)
-    # train_config.concurrency_width(2)

    if args.use_boxing_v2:
        train_config.use_boxing_v2(True)

--- a/Classification/cnns/of_cnn_train_val.py
+++ b/Classification/cnns/of_cnn_train_val.py
@@ -15,6 +15,8 @@ import resnet_model
 import vgg_model
 import alexnet_model

+os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
+

 parser = configs.get_parser()
 args = parser.parse_args()

--- a/Classification/cnns/optimizer_util.py
+++ b/Classification/cnns/optimizer_util.py
@@ -3,6 +3,7 @@ from __future__ import division
 from __future__ import print_function

 import math
+import pprint

 def add_optimizer_args(parser):
    group = parser.add_argument_group('optimizer parameters',
@@ -59,16 +60,7 @@ def gen_model_update_conf(args):
            "decay_batches": decay_batches, 
            "end_learning_rate": 0.00001,
        }}
-    
-    # weight decay
-    # if args.wd > 0:
-    #     assert args.wd < 1.0
-    #     model_update_conf['weight_decay_conf'] = {
-    #         "weight_decay_rate": args.wd, 
-    #         "excludes": {"pattern": ['_bn-']}
-    #     }

-    import pprint
    pprint.pprint(model_update_conf)
    return model_update_conf


--- a/Classification/cnns/vgg_model.py
+++ b/Classification/cnns/vgg_model.py
@@ -17,6 +17,8 @@ def _batch_norm(inputs, name=None, trainable=True):
        name=name,
    )

+def _get_regularizer():
+    return flow.regularizers.l2(0.00005)

 def conv2d_layer(
    name,
@@ -29,12 +31,16 @@ def conv2d_layer(
    dilation_rate=1,
    activation="Relu",
    use_bias=True,
-    weight_initializer=flow.variance_scaling_initializer(2, 'fan_out', 'random_normal',
-                                                         data_format="NCHW"),
+    weight_initializer=flow.variance_scaling_initializer(2, 'fan_out', 'random_normal', data_format="NCHW"),
    bias_initializer=flow.zeros_initializer(),
+
+    weight_regularizer=_get_regularizer(), # weight_decay
+    bias_regularizer=_get_regularizer(),
+
    bn=True,
-):
+):   
    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    print("weight_shape:{}".format(weight_shape))
    weight = flow.get_variable(
        name + "_weight",
        shape=weight_shape,
@@ -59,7 +65,7 @@ def conv2d_layer(
                output = _batch_norm(output, name + "_bn")
                output = flow.nn.relu(output)
            else:
-                output = flow.nn.relu(output)
+                output = flow.nn.relu(output)     
        else:
            raise NotImplementedError

@@ -85,12 +91,12 @@ def _conv_block(in_blob, index, filters, conv_times):
    return conv_block


-def vgg16bn(images, trainable=True, need_transpose=False, training=True, wd=1.0 / 32768, channel_last=False):
+def vgg16bn(images, trainable=True, need_transpose=False, training=True, wd=1.0/32768):
    if need_transpose:
        images = flow.transpose(images, name="transpose", perm=[0, 3, 1, 2])
    conv1 = _conv_block(images, 0, 64, 2)
    pool1 = flow.nn.max_pool2d(conv1[-1], 2, 2, "VALID", "NCHW", name="pool1")
-
+    
    conv2 = _conv_block(pool1, 2, 128, 2)
    pool2 = flow.nn.max_pool2d(conv2[-1], 2, 2, "VALID", "NCHW", name="pool2")

@@ -117,6 +123,8 @@ def vgg16bn(images, trainable=True, need_transpose=False, training=True, wd=1.0
        use_bias=True,
        kernel_initializer=_get_kernel_initializer(),
        bias_initializer=_get_bias_initializer(),
+        kernel_regularizer=_get_regularizer(),  # weght_decay
+        bias_regularizer=_get_regularizer(),
        trainable=trainable,
        name="dense0",
    )