update README.md

8453aff4 · dangqingqing · aec1c2f0 · 8453aff4 · 8453aff4 · 8453aff4
6 changed file
--- a/image_classification/README.md
+++ b/image_classification/README.md
--- a/image_classification/dataprovider.py
+++ b/image_classification/dataprovider.py
@@ -37,5 +37,7 @@ def process(settings, file_list):
            images = batch['data']
            labels = batch['labels']
            for im, lab in zip(images, labels):
+                if settings.is_train and np.random.randint(2):
+                    im = im[:, :, ::-1]
                im = im - settings.mean
                yield {'image': im.astype('float32'), 'label': int(lab)}
--- a/image_classification/image/googlenet.jpeg
+++ b/image_classification/image/googlenet.jpeg
--- a/image_classification/models/resnet.py
+++ b/image_classification/models/resnet.py
@@ -22,16 +22,16 @@ if not is_predict:
        test_list='data/test.list',
        module='dataprovider',
        obj='process',
-        args=args)
+        args={'mean_path': 'data/mean.meta'})
 settings(
    batch_size=128,
    learning_rate=0.1 / 128.0,
    learning_rate_decay_a=0.1,
-    learning_rate_decay_b=50000 * 100,
+    learning_rate_decay_b=50000 * 140,
    learning_rate_schedule='discexp',
    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0001 * 128))
+    regularization=L2Regularization(0.0002 * 128))
 def conv_bn_layer(input,
@@ -55,6 +55,7 @@ def conv_bn_layer(input,
 def shortcut(ipt, n_in, n_out, stride):
    if n_in != n_out:
+        print("n_in != n_out")
        return conv_bn_layer(ipt, n_out, 1, stride, 0, LinearActivation())
    else:
        return ipt
@@ -65,7 +66,7 @@ def basicblock(ipt, ch_out, stride):
    tmp = conv_bn_layer(ipt, ch_out, 3, stride, 1)
    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1, LinearActivation())
    short = shortcut(ipt, ch_in, ch_out, stride)
-    return addto_layer(input=[ipt, short], act=ReluActivation())
+    return addto_layer(input=[tmp, short], act=ReluActivation())
 def bottleneck(ipt, ch_out, stride):
@@ -73,8 +74,8 @@ def bottleneck(ipt, ch_out, stride):
    tmp = conv_bn_layer(ipt, ch_out, 1, stride, 0)
    tmp = conv_bn_layer(tmp, ch_out, 3, 1, 1)
    tmp = conv_bn_layer(tmp, ch_out * 4, 1, 1, 0, LinearActivation())
-    short = shortcut(ipt, ch_in, ch_out, stride)
+    short = shortcut(ipt, ch_in, ch_out * 4, stride)
-    return addto_layer(input=[ipt, short], act=ReluActivation())
+    return addto_layer(input=[tmp, short], act=ReluActivation())
 def layer_warp(block_func, ipt, features, count, stride):
@@ -107,25 +108,25 @@ def resnet_imagenet(ipt, depth=50):
    return tmp
-def resnet_cifar10(ipt, depth=56):
+def resnet_cifar10(ipt, depth=32):
-    assert ((depth - 2) % 6 == 0,
+    #depth should be one of 20, 32, 44, 56, 110, 1202
-            'depth should be one of 20, 32, 44, 56, 110, 1202')
+    assert (depth - 2) % 6 == 0
    n = (depth - 2) / 6
    nStages = {16, 64, 128}
-    tmp = conv_bn_layer(
+    conv1 = conv_bn_layer(
        ipt, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1)
-    tmp = layer_warp(basicblock, tmp, 16, n, 1)
+    res1 = layer_warp(basicblock, conv1, 16, n, 1)
-    tmp = layer_warp(basicblock, tmp, 32, n, 2)
+    res2 = layer_warp(basicblock, res1, 32, n, 2)
-    tmp = layer_warp(basicblock, tmp, 64, n, 2)
+    res3 = layer_warp(basicblock, res2, 64, n, 2)
-    tmp = img_pool_layer(
+    pool = img_pool_layer(
-        input=tmp, pool_size=8, stride=1, pool_type=AvgPooling())
+        input=res3, pool_size=8, stride=1, pool_type=AvgPooling())
-    return tmp
+    return pool
 datadim = 3 * 32 * 32
 classdim = 10
 data = data_layer(name='image', size=datadim)
-net = resnet_cifar10(data, depth=56)
+net = resnet_cifar10(data, depth=32)
 out = fc_layer(input=net, size=10, act=SoftmaxActivation())
 if not is_predict:
    lbl = data_layer(name="label", size=classdim)

--- a/image_classification/models/vgg.py
+++ b/image_classification/models/vgg.py
@@ -47,18 +47,18 @@ def vgg_bn_drop(input):
            conv_batchnorm_drop_rate=dropouts,
            pool_type=MaxPooling())
-    tmp = conv_block(input, 64, 2, [0.3, 0], 3)
+    conv1 = conv_block(input, 64, 2, [0.3, 0], 3)
-    tmp = conv_block(tmp, 128, 2, [0.4, 0])
+    conv2 = conv_block(conv1, 128, 2, [0.4, 0])
-    tmp = conv_block(tmp, 256, 3, [0.4, 0.4, 0])
+    conv3 = conv_block(conv2, 256, 3, [0.4, 0.4, 0])
-    tmp = conv_block(tmp, 512, 3, [0.4, 0.4, 0])
+    conv4 = conv_block(conv3, 512, 3, [0.4, 0.4, 0])
-    tmp = conv_block(tmp, 512, 3, [0.4, 0.4, 0])
+    conv5 = conv_block(conv4, 512, 3, [0.4, 0.4, 0])
-    tmp = dropout_layer(input=tmp, dropout_rate=0.5)
+    drop = dropout_layer(input=conv5, dropout_rate=0.5)
-    tmp = fc_layer(input=tmp, size=512, act=LinearActivation())
+    fc1 = fc_layer(input=drop, size=512, act=LinearActivation())
-    tmp = batch_norm_layer(
+    bn = batch_norm_layer(
-        input=tmp, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
+        input=fc1, act=ReluActivation(), layer_attr=ExtraAttr(drop_rate=0.5))
-    tmp = fc_layer(input=tmp, size=512, act=LinearActivation())
+    fc2 = fc_layer(input=bn, size=512, act=LinearActivation())
-    return tmp
+    return fc2
 datadim = 3 * 32 * 32

--- a/image_classification/train.sh
+++ b/image_classification/train.sh
@@ -25,5 +25,5 @@ paddle train \
    --trainer_count=4 \
    --log_period=100 \
    --num_passes=300 \
-    --save_dir=$output
+    --save_dir=$output \
    2>&1 | tee $log