follow comment

5b47f3c2 · dayhaha · 202184b0 · 5b47f3c2 · 5b47f3c2 · 202184b0
19 changed file
--- a/recognize_digits/.gitignore
+++ b/recognize_digits/.gitignore
@@ -2,3 +2,5 @@ data/raw_data
 data/train.list
 data/test.list
 *.log
+*.pyc
+plot.png
--- a/recognize_digits/README.md
+++ b/recognize_digits/README.md
--- a/recognize_digits/cnn_mnist.py
+++ b/recognize_digits/cnn_mnist.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-is_predict = get_config_arg("is_predict", bool, False)
-####################Data Configuration ##################
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-######################Algorithm Configuration #############
-settings(
-    batch_size=128,
-    learning_rate=0.1 / 128.0,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * 128))
-#######################Network Configuration #############
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-# first conv layer
-conv_pool_1 = simple_img_conv_pool(
-    input=img,
-    filter_size=5,
-    num_filters=20,
-    num_channel=1,
-    pool_size=2,
-    pool_stride=2,
-    act=TanhActivation())
-# second conv layer
-conv_pool_2 = simple_img_conv_pool(
-    input=conv_pool_1,
-    filter_size=5,
-    num_filters=50,
-    num_channel=20,
-    pool_size=2,
-    pool_stride=2,
-    act=TanhActivation())
-# The first fully-connected layer
-fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation())
-# The softmax layer, note that the hidden size should be 10,
-# which is the number of unique digits
-predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation())
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
--- a/recognize_digits/data/get_mnist_data.sh
+++ b/recognize_digits/data/get_mnist_data.sh
@@ -31,5 +31,5 @@ done
 cd $DIR
 rm -f *.list
-echo "./data/raw_data/train\n" > "$DIR/train.list"
+echo "./data/raw_data/train" > "$DIR/train.list"
-echo "./data/raw_data/t10k\n" > "$DIR/test.list"
+echo "./data/raw_data/t10k" > "$DIR/test.list"
--- a/recognize_digits/evaluate.py
+++ b/recognize_digits/evaluate.py
@@ -30,8 +30,6 @@ def get_best_pass(filename):
 filename = sys.argv[1]
 log = get_best_pass(filename)
-predict_error = math.sqrt(float(log[0])) / 2
 classification_accuracy = (1 - float(log[1])) * 100
-print 'Best pass is %s, error is %s, which means predict get error as %f' % (
+print 'Best pass is %s, testing Avgcost is %s' % (log[2], log[0])
-    log[2], log[0], predict_error)
 print 'The classification accuracy is %.2f%%' % classification_accuracy
--- a/recognize_digits/image/Conv_layer.png
+++ b/recognize_digits/image/Conv_layer.png
--- a/recognize_digits/image/cnn_train_log.png
+++ b/recognize_digits/image/cnn_train_log.png
--- a/recognize_digits/image/conv_layer.png
+++ b/recognize_digits/image/conv_layer.png
--- a/recognize_digits/image/Max_pooling.png
+++ b/recognize_digits/image/Max_pooling.png
--- a/recognize_digits/image/MLP.png
+++ b/recognize_digits/image/MLP.png
--- a/recognize_digits/image/mlp_train_log.png
+++ b/recognize_digits/image/mlp_train_log.png
--- a/recognize_digits/image/softmax_train_log.png
+++ b/recognize_digits/image/softmax_train_log.png
--- a/recognize_digits/load_data.py
+++ b/recognize_digits/load_data.py
@@ -14,34 +14,28 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import random
+import struct
 def read_data(path, filename):
-    imgf = path + filename + "-images-idx3-ubyte"
+    with open(path + filename + "-images-idx3-ubyte",
-    labelf = path + filename + "-labels-idx1-ubyte"
+              "rb") as f:  # open picture file
-    f = open(imgf, "rb")
+        magic, n, rows, cols = struct.unpack(">IIII", f.read(16))
-    l = open(labelf, "rb")
+        images = np.fromfile(
+            f, 'ubyte',
-    f.read(16)
+            count=n * rows * cols).reshape(n, rows, cols).astype('float32')
-    l.read(8)
+    with open(path + filename + "-labels-idx1-ubyte",
-    # Define number of samples for train/test
+              "rb") as l:  # open label file
-    n = 60000 if "train" in filename else 10000
+        magic, n = struct.unpack(">II", l.read(8))
+        labels = np.fromfile(l, 'ubyte', count=n).astype("int")
-    rows = 28
-    cols = 28
-    images = np.fromfile(
-        f, 'ubyte',
-        count=n * rows * cols).reshape(n, rows, cols).astype('float32')
-    labels = np.fromfile(l, 'ubyte', count=n).astype("int")
    return images, labels
 if __name__ == "__main__":
-    train_images, train_labels = read_data("./raw_data/", "train")
+    train_images, train_labels = read_data("./data/raw_data/", "train")
-    test_images, test_labels = read_data("./raw_data/", "t10k")
+    test_images, test_labels = read_data("./data/raw_data/", "t10k")
    label_list = []
    for i in range(10):
        index = random.randint(0, train_images.shape[0] - 1)

--- a/recognize_digits/mlp_mnist.py
+++ b/recognize_digits/mlp_mnist.py
@@ -39,13 +39,53 @@ data_size = 1 * 28 * 28
 label_size = 10
 img = data_layer(name='pixel', size=data_size)
-# The first fully-connected layer
-hidden1 = fc_layer(input=img, size=128, act=ReluActivation())
+def softmax_regression(img):
-# The second fully-connected layer and the according activation function
+    predict = fc_layer(input=img, size=10, act=SoftmaxActivation())
-hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation())
+    return predict
-# The thrid fully-connected layer, note that the hidden size should be 10,
-# which is the number of unique digits
-predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
+def multilayer_perceptron(img):
+    # The first fully-connected layer
+    hidden1 = fc_layer(input=img, size=128, act=ReluActivation())
+    # The second fully-connected layer and the according activation function
+    hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation())
+    # The thrid fully-connected layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
+    return predict
+def convolutional_neural_network(img):
+    # first conv layer
+    conv_pool_1 = simple_img_conv_pool(
+        input=img,
+        filter_size=5,
+        num_filters=20,
+        num_channel=1,
+        pool_size=2,
+        pool_stride=2,
+        act=TanhActivation())
+    # second conv layer
+    conv_pool_2 = simple_img_conv_pool(
+        input=conv_pool_1,
+        filter_size=5,
+        num_filters=50,
+        num_channel=20,
+        pool_size=2,
+        pool_stride=2,
+        act=TanhActivation())
+    # The first fully-connected layer
+    fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation())
+    # The softmax layer, note that the hidden size should be 10,
+    # which is the number of unique digits
+    predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation())
+    return predict
+predict = softmax_regression(img)
+#predict = multilayer_perceptron(img)
+#predict = convolutional_neural_network(img)
 if not is_predict:
    lbl = data_layer(name="label", size=label_size)

--- a/recognize_digits/mnist_provider.py
+++ b/recognize_digits/mnist_provider.py
@@ -13,36 +13,25 @@
 # limitations under the License.
 from paddle.trainer.PyDataProvider2 import *
-import numpy
+import numpy as np
+import struct
 # Define a py data provider
 @provider(
    input_types={'pixel': dense_vector(28 * 28),
-                 'label': integer_value(10)},
+                 'label': integer_value(10)})
-    cache=CacheType.CACHE_PASS_IN_MEM)
 def process(settings, filename):  # settings is not used currently.
-    imgf = filename + "-images-idx3-ubyte"
+    with open(filename + "-images-idx3-ubyte", "rb") as f:  # open picture file
-    labelf = filename + "-labels-idx1-ubyte"
+        magic, n, rows, cols = struct.unpack(">IIII", f.read(16))
-    f = open(imgf, "rb")
+        images = np.fromfile(
-    l = open(labelf, "rb")
+            f, 'ubyte',
+            count=n * rows * cols).reshape(n, rows, cols).astype('float32')
+        images = images / 255.0 * 2.0 - 1.0  # normalized to [-1,1]
-    f.read(16)
+    with open(filename + "-labels-idx1-ubyte", "rb") as l:  # open label file
-    l.read(8)
+        magic, n = struct.unpack(">II", l.read(8))
+        labels = np.fromfile(l, 'ubyte', count=n).astype("int")
-    # Define number of samples for train/test
-    if "train" in filename:
-        n = 60000
-    else:
-        n = 10000
-    images = numpy.fromfile(
-        f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
-    images = images / 255.0 * 2.0 - 1.0
-    labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
    for i in xrange(n):
        yield {"pixel": images[i, :], 'label': labels[i]}
-    f.close()
-    l.close()
--- a/recognize_digits/plot_error.py
+++ b/recognize_digits/plot_error.py
@@ -20,15 +20,33 @@ import sys
 def plot_log(filename):
    with open(filename, 'r') as f:
        text = f.read()
-        pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
+        pattern = re.compile(
-                             re.S)
+            'AvgCost=([0-9]+\.[0-9]+).*?Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
+            re.S)
        results = re.findall(pattern, text)
-        cost, pass_ = zip(*results)
+        train_cost, test_cost, pass_ = zip(*results)
-        cost_float = map(float, cost)
+        train_cost_float = map(float, train_cost)
+        test_cost_float = map(float, test_cost)
        pass_int = map(int, pass_)
-        plt.plot(pass_int, cost_float, 'bo', pass_, cost_float, 'k')
+        plt.plot(pass_int, train_cost_float, 'red', label='Train')
+        plt.plot(pass_int, test_cost_float, 'g--', label='Test')
        plt.ylabel('AvgCost')
-        plt.xlabel('epoch')
+        plt.xlabel('Epoch')
+        # Now add the legend with some customizations.
+        legend = plt.legend(loc='upper right', shadow=False)
+        # The frame is matplotlib.patches.Rectangle instance surrounding the legend.
+        frame = legend.get_frame()
+        frame.set_facecolor('0.90')
+        # Set the fontsize
+        for label in legend.get_texts():
+            label.set_fontsize('large')
+        for label in legend.get_lines():
+            label.set_linewidth(1.5)  # the legend line width
        plt.show()

--- a/recognize_digits/predict.py
+++ b/recognize_digits/predict.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""Usage: predict.py -c CONF -d DATA -m MODEL
+"""Usage: predict.py -c CONF -d ./data/raw_data/  -m MODEL
 Arguments:
@@ -57,8 +57,10 @@ class Prediction():
        output = self.network.forwardTest(input)
        prob = output[0]["value"]
        predict = np.argsort(-prob)
+        print "Predicted probability of each digit:"
        print prob
-        print predict[0][0], self.labels[index]
+        print "Predict Number: %d" % predict[0][0]
+        print "Actual Number: %d" % self.labels[index]
 def main():

--- a/recognize_digits/softmax_mnist.py
+++ b/recognize_digits/softmax_mnist.py
-# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from paddle.trainer_config_helpers import *
-is_predict = get_config_arg("is_predict", bool, False)
-####################Data Configuration ##################
-if not is_predict:
-    data_dir = './data/'
-    define_py_data_sources2(
-        train_list=data_dir + 'train.list',
-        test_list=data_dir + 'test.list',
-        module='mnist_provider',
-        obj='process')
-######################Algorithm Configuration #############
-settings(
-    batch_size=128,
-    learning_rate=0.1 / 128.0,
-    learning_method=MomentumOptimizer(0.9),
-    regularization=L2Regularization(0.0005 * 128))
-#######################Network Configuration #############
-data_size = 1 * 28 * 28
-label_size = 10
-img = data_layer(name='pixel', size=data_size)
-# mlp is used
-predict = fc_layer(input=img, size=10, act=SoftmaxActivation())
-if not is_predict:
-    lbl = data_layer(name="label", size=label_size)
-    inputs(img, lbl)
-    outputs(classification_cost(input=predict, label=lbl))
-else:
-    outputs(predict)
--- a/recognize_digits/train.sh
+++ b/recognize_digits/train.sh
@@ -14,9 +14,9 @@
 # limitations under the License.
 set -e
-config=cnn_mnist.py
+config=mnist_model.py
-output=./cnn_mnist_model
+output=./softmax_mnist_model
-log=cnn_train.log
+log=softmax_train.log