提交 45d1e497 编写于 作者: D dayhaha

initial complete for recognize_digits

上级 4672708e
此差异已折叠。
#!/usr/bin/env sh
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This scripts downloads the mnist data and unzips it.
set -e
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
rm -rf "$DIR/raw_data"
mkdir "$DIR/raw_data"
cd "$DIR/raw_data"
echo "Downloading..."
for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
do
if [ ! -e $fname ]; then
wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
gunzip ${fname}.gz
fi
done
cd $DIR
rm -f *.list
echo "./data/raw_data/train\n" > "$DIR/train.list"
echo "./data/raw_data/t10k\n" > "$DIR/test.list"
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir = './data/'
define_py_data_sources2(
train_list=data_dir + 'train.list',
test_list=data_dir + 'test.list',
module='mnist_provider',
obj='process')
######################Algorithm Configuration #############
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
data_size = 1 * 28 * 28
label_size = 10
img = data_layer(name='pixel', size=data_size)
# first conv layer
conv_pool_1 = simple_img_conv_pool(
input=img,
filter_size=5,
num_filters=20,
num_channel=1,
pool_size=2,
pool_stride=2,
act=TanhActivation())
# second conv layer
conv_pool_2 = simple_img_conv_pool(
input=conv_pool_1,
filter_size=5,
num_filters=50,
num_channel=20,
pool_size=2,
pool_stride=2,
act=TanhActivation())
# The first fully-connected layer
fc1 = fc_layer(input=conv_pool_2, size=128, act=TanhActivation())
# The softmax layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = fc_layer(input=fc1, size=10, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer(name="label", size=label_size)
inputs(img, lbl)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
#!/usr/bin/python
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import math
def get_best_pass(filename):
with open(filename, 'r') as f:
text = f.read()
pattern = re.compile(
'Test.*? cost=([0-9]+\.[0-9]+).*?classification_error_evaluator=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
sorted_results = sorted(results, key=lambda result: float(result[0]))
return sorted_results[0]
filename = sys.argv[1]
log = get_best_pass(filename)
predict_error = math.sqrt(float(log[0])) / 2
classification_accuracy = (1 - float(log[1])) * 100
print 'Best pass is %s, error is %s, which means predict get error as %f' % (
log[2], log[0], predict_error)
print 'The classification accuracy is %.2f%%' % classification_accuracy
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
import matplotlib.pyplot as plt
import random
def read_data(path, filename):
imgf = path + filename + "-images-idx3-ubyte"
labelf = path + filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")
f.read(16)
l.read(8)
# Define number of samples for train/test
n = 60000 if "train" in filename else 10000
rows = 28
cols = 28
images = np.fromfile(
f, 'ubyte',
count=n * rows * cols).reshape(n, rows, cols).astype('float32')
labels = np.fromfile(l, 'ubyte', count=n).astype("int")
return images, labels
if __name__ == "__main__":
train_images, train_labels = read_data("./raw_data/", "train")
test_images, test_labels = read_data("./raw_data/", "t10k")
label_list = []
for i in range(10):
index = random.randint(0, train_images.shape[0] - 1)
label_list.append(train_labels[index])
plt.subplot(1, 10, i + 1)
plt.imshow(train_images[index], cmap="Greys_r")
plt.axis('off')
print('label: %s' % (label_list, ))
plt.show()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir = './data/'
define_py_data_sources2(
train_list=data_dir + 'train.list',
test_list=data_dir + 'test.list',
module='mnist_provider',
obj='process')
######################Algorithm Configuration #############
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
data_size = 1 * 28 * 28
label_size = 10
img = data_layer(name='pixel', size=data_size)
# The first fully-connected layer
hidden1 = fc_layer(input=img, size=128, act=ReluActivation())
# The second fully-connected layer and the according activation function
hidden2 = fc_layer(input=hidden1, size=64, act=ReluActivation())
# The thrid fully-connected layer, note that the hidden size should be 10,
# which is the number of unique digits
predict = fc_layer(input=hidden2, size=10, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer(name="label", size=label_size)
inputs(img, lbl)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer.PyDataProvider2 import *
import numpy
# Define a py data provider
@provider(
input_types={'pixel': dense_vector(28 * 28),
'label': integer_value(10)},
cache=CacheType.CACHE_PASS_IN_MEM)
def process(settings, filename): # settings is not used currently.
imgf = filename + "-images-idx3-ubyte"
labelf = filename + "-labels-idx1-ubyte"
f = open(imgf, "rb")
l = open(labelf, "rb")
f.read(16)
l.read(8)
# Define number of samples for train/test
if "train" in filename:
n = 60000
else:
n = 10000
images = numpy.fromfile(
f, 'ubyte', count=n * 28 * 28).reshape((n, 28 * 28)).astype('float32')
images = images / 255.0 * 2.0 - 1.0
labels = numpy.fromfile(l, 'ubyte', count=n).astype("int")
for i in xrange(n):
yield {"pixel": images[i, :], 'label': labels[i]}
f.close()
l.close()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import matplotlib.pyplot as plt
import re
import sys
def plot_log(filename):
with open(filename, 'r') as f:
text = f.read()
pattern = re.compile('Test.*? cost=([0-9]+\.[0-9]+).*?pass-([0-9]+)',
re.S)
results = re.findall(pattern, text)
cost, pass_ = zip(*results)
cost_float = map(float, cost)
pass_int = map(int, pass_)
plt.plot(pass_int, cost_float, 'bo', pass_, cost_float, 'k')
plt.ylabel('AvgCost')
plt.xlabel('epoch')
plt.show()
if __name__ == '__main__':
plot_log(sys.argv[1])
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Usage: predict.py -c CONF -d DATA -m MODEL
Arguments:
CONF train conf
DATA MNIST Data
MODEL Model
Options:
-h --help
-c conf
-d data
-m model
"""
import os
import sys
from docopt import docopt
import matplotlib.pyplot as plt
import numpy as np
from py_paddle import swig_paddle, DataProviderConverter
from paddle.trainer.PyDataProvider2 import dense_vector
from paddle.trainer.config_parser import parse_config
from load_data import read_data
class Prediction():
def __init__(self, train_conf, data_dir, model_dir):
conf = parse_config(train_conf, 'is_predict=1')
self.network = swig_paddle.GradientMachine.createFromConfigProto(
conf.model_config)
self.network.loadParameters(model_dir)
self.images, self.labels = read_data(data_dir, "t10k")
slots = [dense_vector(28 * 28)]
self.converter = DataProviderConverter(slots)
def predict(self, index):
input = self.converter([[self.images[index].flatten().tolist()]])
output = self.network.forwardTest(input)
prob = output[0]["value"]
predict = np.argsort(-prob)
print prob
print predict[0][0], self.labels[index]
def main():
arguments = docopt(__doc__)
train_conf = arguments['CONF']
data_dir = arguments['DATA']
model_dir = arguments['MODEL']
swig_paddle.initPaddle("--use_gpu=0")
predictor = Prediction(train_conf, data_dir, model_dir)
while True:
index = int(raw_input("Input image_id [0~9999]: "))
predictor.predict(index)
if __name__ == '__main__':
main()
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
is_predict = get_config_arg("is_predict", bool, False)
####################Data Configuration ##################
if not is_predict:
data_dir = './data/'
define_py_data_sources2(
train_list=data_dir + 'train.list',
test_list=data_dir + 'test.list',
module='mnist_provider',
obj='process')
######################Algorithm Configuration #############
settings(
batch_size=128,
learning_rate=0.1 / 128.0,
learning_method=MomentumOptimizer(0.9),
regularization=L2Regularization(0.0005 * 128))
#######################Network Configuration #############
data_size = 1 * 28 * 28
label_size = 10
img = data_layer(name='pixel', size=data_size)
# mlp is used
predict = fc_layer(input=img, size=10, act=SoftmaxActivation())
if not is_predict:
lbl = data_layer(name="label", size=label_size)
inputs(img, lbl)
outputs(classification_cost(input=predict, label=lbl))
else:
outputs(predict)
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
config=cnn_mnist.py
output=./cnn_mnist_model
log=cnn_train.log
paddle train \
--config=$config \
--dot_period=10 \
--log_period=100 \
--test_all_data_in_one_period=1 \
--use_gpu=0 \
--trainer_count=1 \
--num_passes=100 \
--save_dir=$output \
2>&1 | tee $log
python -m paddle.utils.plotcurve -i $log > plot.png
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册