diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d902e6da25fd9bb4b0f2729a9cb7b90834f60307..2e6aebbd9a294bd3cc8c253b4133f8669b2dd400 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -24,7 +24,7 @@ files: \.md$ - id: remove-tabs files: \.md$ -- repo: git://github.com/reyoung/pre-commit-hooks-jinja-compile.git +- repo: https://github.com/reyoung/pre-commit-hooks-jinja-compile.git sha: 85ad800cbc9c60a64230d60971aa9576fd57e508 hooks: - id: convert-jinja2-into-html diff --git a/recognize_digits/data/get_mnist_data.sh b/recognize_digits/data/get_mnist_data.sh deleted file mode 100755 index 8d5cf179a940be06288d283e8a783b28d038acad..0000000000000000000000000000000000000000 --- a/recognize_digits/data/get_mnist_data.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/env sh -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# This scripts downloads the mnist data and unzips it. -set -e -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -rm -rf "$DIR/raw_data" -mkdir "$DIR/raw_data" -cd "$DIR/raw_data" - -echo "Downloading..." - -for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte -do - if [ ! -e $fname ]; then - wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz - gunzip ${fname}.gz - fi -done - -cd $DIR -rm -f *.list -echo "./data/raw_data/train" > "$DIR/train.list" -echo "./data/raw_data/t10k" > "$DIR/test.list" diff --git a/recognize_digits/evaluate.py b/recognize_digits/evaluate.py deleted file mode 100755 index b91467e242e83fe47b910c44122acc26209dcaec..0000000000000000000000000000000000000000 --- a/recognize_digits/evaluate.py +++ /dev/null @@ -1,35 +0,0 @@ -#!/usr/bin/python -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import sys -import re -import math - - -def get_best_pass(filename): - with open(filename, 'r') as f: - text = f.read() - pattern = re.compile( - 'Test.*? cost=([0-9]+\.[0-9]+).*?classification_error_evaluator=([0-9]+\.[0-9]+).*?pass-([0-9]+)', - re.S) - results = re.findall(pattern, text) - sorted_results = sorted(results, key=lambda result: float(result[0])) - return sorted_results[0] - - -filename = sys.argv[1] -log = get_best_pass(filename) -classification_accuracy = (1 - float(log[1])) * 100 -print 'Best pass is %s, testing Avgcost is %s' % (log[2], log[0]) -print 'The classification accuracy is %.2f%%' % classification_accuracy diff --git a/recognize_digits/load_data.py b/recognize_digits/load_data.py deleted file mode 100644 index a3055a591ee897afccc1e56c4f8abde5b274e93f..0000000000000000000000000000000000000000 --- a/recognize_digits/load_data.py +++ /dev/null @@ -1,47 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -import numpy as np -import matplotlib.pyplot as plt -import random -import struct - - -def read_data(path, filename): - with open(path + filename + "-images-idx3-ubyte", - "rb") as f: # open picture file - magic, n, rows, cols = struct.unpack(">IIII", f.read(16)) - images = np.fromfile( - f, 'ubyte', - count=n * rows * cols).reshape(n, rows, cols).astype('float32') - - with open(path + filename + "-labels-idx1-ubyte", - "rb") as l: # open label file - magic, n = struct.unpack(">II", l.read(8)) - labels = np.fromfile(l, 'ubyte', count=n).astype("int") - - return images, labels - - -if __name__ == "__main__": - train_images, train_labels = read_data("./data/raw_data/", "train") - test_images, test_labels = read_data("./data/raw_data/", "t10k") - label_list = [] - for i in range(10): - index = random.randint(0, train_images.shape[0] - 1) - label_list.append(train_labels[index]) - plt.subplot(1, 10, i + 1) - plt.imshow(train_images[index], cmap="Greys_r") - plt.axis('off') - print('label: %s' % (label_list, )) - plt.show() diff --git a/recognize_digits/mnist_model.py b/recognize_digits/mnist_model.py deleted file mode 100644 index 4bece1bbe5988c0acc6341d3fb1a6b81c3dcbed7..0000000000000000000000000000000000000000 --- a/recognize_digits/mnist_model.py +++ /dev/null @@ -1,95 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer_config_helpers import * - -is_predict = get_config_arg("is_predict", bool, False) - -####################Data Configuration ################## - -if not is_predict: - data_dir = './data/' - define_py_data_sources2( - train_list=data_dir + 'train.list', - test_list=data_dir + 'test.list', - module='mnist_provider', - obj='process') - -######################Algorithm Configuration ############# -settings( - batch_size=128, - learning_rate=0.1 / 128.0, - learning_method=MomentumOptimizer(0.9), - regularization=L2Regularization(0.0005 * 128)) - -#######################Network Configuration ############# - -data_size = 1 * 28 * 28 -label_size = 10 -img = data_layer(name='pixel', size=data_size) - - -def softmax_regression(img): - predict = fc_layer(input=img, size=10, act=SoftmaxActivation()) - return predict - - -def multilayer_perceptron(img): - # The first fully-connected layer - hidden1 = fc_layer(input=img, size=128, act=ReluActivation()) - # The second fully-connected layer and the according activation function - hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation()) - # The thrid fully-connected layer, note that the hidden size should be 10, - # which is the number of unique digits - predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation()) - return predict - - -def convolutional_neural_network(img): - # first conv layer - conv_pool_1 = simple_img_conv_pool( - input=img, - filter_size=5, - num_filters=20, - num_channel=1, - pool_size=2, - pool_stride=2, - act=TanhActivation()) - # second conv layer - conv_pool_2 = simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=50, - num_channel=20, - pool_size=2, - pool_stride=2, - act=TanhActivation()) - # The first fully-connected layer - fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation()) - # The softmax layer, note that the hidden size should be 10, - # which is the number of unique digits - predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation()) - return predict - - -predict = softmax_regression(img) -#predict = multilayer_perceptron(img) -#predict = convolutional_neural_network(img) - -if not is_predict: - lbl = data_layer(name="label", size=label_size) - inputs(img, lbl) - outputs(classification_cost(input=predict, label=lbl)) -else: - outputs(predict) diff --git a/recognize_digits/mnist_provider.py b/recognize_digits/mnist_provider.py deleted file mode 100644 index b6f1d9662ed46aa18030543246b096fdc2e892cc..0000000000000000000000000000000000000000 --- a/recognize_digits/mnist_provider.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from paddle.trainer.PyDataProvider2 import * -import numpy as np -import struct - - -# Define a py data provider -@provider( - input_types={'pixel': dense_vector(28 * 28), - 'label': integer_value(10)}) -def process(settings, filename): # settings is not used currently. - with open(filename + "-images-idx3-ubyte", "rb") as f: # open picture file - magic, n, rows, cols = struct.unpack(">IIII", f.read(16)) - images = np.fromfile( - f, 'ubyte', - count=n * rows * cols).reshape(n, rows, cols).astype('float32') - images = images / 255.0 * 2.0 - 1.0 # normalized to [-1,1] - - with open(filename + "-labels-idx1-ubyte", "rb") as l: # open label file - magic, n = struct.unpack(">II", l.read(8)) - labels = np.fromfile(l, 'ubyte', count=n).astype("int") - - for i in xrange(n): - yield {"pixel": images[i, :], 'label': labels[i]} diff --git a/recognize_digits/plot_cost.py b/recognize_digits/plot_cost.py deleted file mode 100644 index 1f79e835f6ccb282102a97699ae9c08fa5fd3aae..0000000000000000000000000000000000000000 --- a/recognize_digits/plot_cost.py +++ /dev/null @@ -1,54 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import matplotlib.pyplot as plt -import re -import sys - - -def plot_log(filename): - with open(filename, 'r') as f: - text = f.read() - pattern = re.compile( - 'AvgCost=([0-9]+\.[0-9]+).*?Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)', - re.S) - results = re.findall(pattern, text) - train_cost, test_cost, pass_ = zip(*results) - train_cost_float = map(float, train_cost) - test_cost_float = map(float, test_cost) - pass_int = map(int, pass_) - plt.plot(pass_int, train_cost_float, 'red', label='Train') - plt.plot(pass_int, test_cost_float, 'g--', label='Test') - plt.ylabel('AvgCost') - plt.xlabel('Epoch') - - # Now add the legend with some customizations. - legend = plt.legend(loc='upper right', shadow=False) - - # The frame is matplotlib.patches.Rectangle instance surrounding the legend. - frame = legend.get_frame() - frame.set_facecolor('0.90') - - # Set the fontsize - for label in legend.get_texts(): - label.set_fontsize('large') - - for label in legend.get_lines(): - label.set_linewidth(1.5) # the legend line width - - plt.show() - - -if __name__ == '__main__': - plot_log(sys.argv[1]) diff --git a/recognize_digits/predict.py b/recognize_digits/predict.py deleted file mode 100644 index 0a6c87bf1a1b982141fd9924f33532ba77e37974..0000000000000000000000000000000000000000 --- a/recognize_digits/predict.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Usage: predict.py -c CONF -d DATA -m MODEL - -Arguments: - CONF train conf - DATA MNIST Data - MODEL Model - -Options: - -h --help - -c conf - -d data - -m model -""" - -import os -import sys -from docopt import docopt -import numpy as np - -from py_paddle import swig_paddle, DataProviderConverter -from paddle.trainer.PyDataProvider2 import dense_vector -from paddle.trainer.config_parser import parse_config - -from load_data import read_data - - -class Prediction(): - def __init__(self, train_conf, data_dir, model_dir): - - conf = parse_config(train_conf, 'is_predict=1') - self.network = swig_paddle.GradientMachine.createFromConfigProto( - conf.model_config) - self.network.loadParameters(model_dir) - - self.images, self.labels = read_data(data_dir, "t10k") - self.images = self.images / 255.0 * 2.0 - 1.0 # normalized to [-1,1] - - slots = [dense_vector(28 * 28)] - self.converter = DataProviderConverter(slots) - - def predict(self, index): - input = self.converter([[self.images[index].flatten().tolist()]]) - output = self.network.forwardTest(input) - prob = output[0]["value"] - predict = np.argsort(-prob) - print "Predicted probability of each digit:" - print prob - print "Predict Number: %d" % predict[0][0] - print "Actual Number: %d" % self.labels[index] - - -def main(): - arguments = docopt(__doc__) - train_conf = arguments['CONF'] - data_dir = arguments['DATA'] - model_dir = arguments['MODEL'] - swig_paddle.initPaddle("--use_gpu=0") - predictor = Prediction(train_conf, data_dir, model_dir) - while True: - index = int(raw_input("Input image_id [0~9999]: ")) - predictor.predict(index) - - -if __name__ == '__main__': - main() diff --git a/recognize_digits/train.sh b/recognize_digits/train.sh deleted file mode 100755 index dfe59b746e10c41b24e9a431cfa092e3dad31b4f..0000000000000000000000000000000000000000 --- a/recognize_digits/train.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -set -e - -config=mnist_model.py -output=./softmax_mnist_model -log=softmax_train.log - - - -paddle train \ ---config=$config \ ---dot_period=10 \ ---log_period=100 \ ---test_all_data_in_one_period=1 \ ---use_gpu=0 \ ---trainer_count=1 \ ---num_passes=100 \ ---save_dir=$output \ -2>&1 | tee $log - -python -m paddle.utils.plotcurve -i $log > plot.png