From 554cea683957adae683d3b7dd23c062b45ab1c34 Mon Sep 17 00:00:00 2001 From: tangwei12 Date: Wed, 11 Apr 2018 20:40:46 +0800 Subject: [PATCH] move most codes to uci_housing #9660 --- doc/fluid/getstarted/quickstart_cn.rst | 99 ++++----------------- doc/fluid/getstarted/quickstart_en.rst | 116 ++++++------------------- python/paddle/dataset/uci_housing.py | 36 +++++++- 3 files changed, 80 insertions(+), 171 deletions(-) diff --git a/doc/fluid/getstarted/quickstart_cn.rst b/doc/fluid/getstarted/quickstart_cn.rst index 102ce803f21..5644911b007 100644 --- a/doc/fluid/getstarted/quickstart_cn.rst +++ b/doc/fluid/getstarted/quickstart_cn.rst @@ -25,94 +25,31 @@ PaddlePaddle支持使用pip快速安装,目前支持CentOS 6以上, Ubuntu 14. 创建一个 housing.py 并粘贴此Python代码: .. code-block:: python - - import sys - - import math - import numpy - - import paddle.fluid as fluid - import paddle.fluid.core as core import paddle + import paddle.fluid as fluid - def train(save_dirname): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) - y = fluid.layers.data(name='y', shape=[1], dtype='float32') - - cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) - - sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) - - BATCH_SIZE = 20 - - train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.uci_housing.train(), buf_size=500), batch_size=BATCH_SIZE) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) - exe.run(fluid.default_startup_program()) - - main_program = fluid.default_main_program() - PASS_NUM = 100 - for pass_id in range(PASS_NUM): - for data in train_reader(): - avg_loss_value, = exe.run(main_program, - feed=feeder.feed(data), - fetch_list=[avg_cost]) - if avg_loss_value[0] < 10.0: - if save_dirname is not None: - fluid.io.save_inference_model(save_dirname, ['x'], - [y_predict], exe) - return - if math.isnan(float(avg_loss_value)): - sys.exit("got NaN loss, training failed.") - raise AssertionError("Fit a line cost is too large, {0:2.2}".format( - avg_loss_value[0])) + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + place = fluid.CPUPlace() + exe = fluid.Executor(place=place) + feeder = fluid.DataFeeder(place=place, feed_list=[x]) - def infer(save_dirname): - place = fluid.CPUPlace() - exe = fluid.Executor(place) + with fluid.scope_guard(fluid.core.Scope()): + parameter_model = paddle.dataset.uci_housing.fluid_model() - probs = [] + [inference_program, feed_target_names,fetch_targets] = \ + fluid.io.load_inference_model(parameter_model, exe) - inference_scope = fluid.core.Scope() - with fluid.scope_guard(inference_scope): - # Use fluid.io.load_inference_model to obtain the inference program desc, - # the feed_target_names (the names of variables that will be feeded - # data using feed operators), and the fetch_targets (variables that - # we want to obtain data from using fetch operators). - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) + predict_reader = paddle.batch(paddle.dataset.uci_housing.predict_reader(), batch_size=20) - # The input's dimension should be 2-D and the second dim is 13 - # The input data should be >= 0 - batch_size = 10 - tensor_x = numpy.random.uniform(0, 10, - [batch_size, 13]).astype("float32") - assert feed_target_names[0] == 'x' - results = exe.run(inference_program, - feed={feed_target_names[0]: tensor_x}, + results = [] + for data in predict_reader(): + result = exe.run(inference_program, + feed=feeder.feed(data), fetch_list=fetch_targets) - probs.append(results) + results.append(result) - for i in xrange(len(probs)): - print(probs[i][0] * 1000) - print('Predicted price: ${0}'.format(probs[i][0] * 1000)) - - def main(): - # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" - - train(save_dirname) - infer(save_dirname) - - if __name__=="__main__": - main() - + for res in results: + for i in xrange(len(res[0])): + print 'Predicted price: ${:,.2f}'.format(res[0][i][0] * 1000) 执行 :code:`python housing.py` 瞧! 它应该打印出预测住房数据的清单。 diff --git a/doc/fluid/getstarted/quickstart_en.rst b/doc/fluid/getstarted/quickstart_en.rst index a5b9e977c34..bb751b91d01 100644 --- a/doc/fluid/getstarted/quickstart_en.rst +++ b/doc/fluid/getstarted/quickstart_en.rst @@ -28,93 +28,33 @@ code: .. code-block:: python - import sys - - import math - import numpy - - import paddle.fluid as fluid - import paddle.fluid.core as core - import paddle - - def train(save_dirname): - x = fluid.layers.data(name='x', shape=[13], dtype='float32') - y_predict = fluid.layers.fc(input=x, size=1, act=None) - y = fluid.layers.data(name='y', shape=[1], dtype='float32') - - cost = fluid.layers.square_error_cost(input=y_predict, label=y) - avg_cost = fluid.layers.mean(cost) - - sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001) - optimize_ops, params_grads = sgd_optimizer.minimize(avg_cost) - - BATCH_SIZE = 20 - - train_reader = paddle.batch( - paddle.reader.shuffle(paddle.dataset.uci_housing.train(), buf_size=500), batch_size=BATCH_SIZE) - - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) - exe.run(fluid.default_startup_program()) - - main_program = fluid.default_main_program() - - PASS_NUM = 100 - for pass_id in range(PASS_NUM): - for data in train_reader(): - avg_loss_value, = exe.run(main_program, - feed=feeder.feed(data), - fetch_list=[avg_cost]) - if avg_loss_value[0] < 10.0: - if save_dirname is not None: - fluid.io.save_inference_model(save_dirname, ['x'], - [y_predict], exe) - return - if math.isnan(float(avg_loss_value)): - sys.exit("got NaN loss, training failed.") - raise AssertionError("Fit a line cost is too large, {0:2.2}".format( - avg_loss_value[0])) - - def infer(save_dirname): - place = fluid.CPUPlace() - exe = fluid.Executor(place) - - probs = [] - - inference_scope = fluid.core.Scope() - with fluid.scope_guard(inference_scope): - # Use fluid.io.load_inference_model to obtain the inference program desc, - # the feed_target_names (the names of variables that will be feeded - # data using feed operators), and the fetch_targets (variables that - # we want to obtain data from using fetch operators). - [inference_program, feed_target_names, - fetch_targets] = fluid.io.load_inference_model(save_dirname, exe) - - # The input's dimension should be 2-D and the second dim is 13 - # The input data should be >= 0 - batch_size = 10 - tensor_x = numpy.random.uniform(0, 10, - [batch_size, 13]).astype("float32") - assert feed_target_names[0] == 'x' - results = exe.run(inference_program, - feed={feed_target_names[0]: tensor_x}, - fetch_list=fetch_targets) - probs.append(results) - - for i in xrange(len(probs)): - print(probs[i][0] * 1000) - print('Predicted price: ${0}'.format(probs[i][0] * 1000)) - - def main(): - # Directory for saving the trained model - save_dirname = "fit_a_line.inference.model" - - train(save_dirname) - infer(save_dirname) - - if __name__=="__main__": - main() + import paddle + import paddle.fluid as fluid + + + x = fluid.layers.data(name='x', shape=[13], dtype='float32') + place = fluid.CPUPlace() + exe = fluid.Executor(place=place) + feeder = fluid.DataFeeder(place=place, feed_list=[x]) + + with fluid.scope_guard(fluid.core.Scope()): + parameter_model = paddle.dataset.uci_housing.fluid_model() + + [inference_program, feed_target_names,fetch_targets] = \ + fluid.io.load_inference_model(parameter_model, exe) + + predict_reader = paddle.batch(paddle.dataset.uci_housing.predict_reader(), batch_size=20) + + results = [] + for data in predict_reader(): + result = exe.run(inference_program, + feed=feeder.feed(data), + fetch_list=fetch_targets) + results.append(result) + + for res in results: + for i in xrange(len(res[0])): + print 'Predicted price: ${:,.2f}'.format(res[0][i][0] * 1000) + Run :code:`python housing.py` and voila! It should print out a list of predictions for the test housing data. diff --git a/python/paddle/dataset/uci_housing.py b/python/paddle/dataset/uci_housing.py index 6a56e9d5563..8da08249b52 100644 --- a/python/paddle/dataset/uci_housing.py +++ b/python/paddle/dataset/uci_housing.py @@ -19,7 +19,11 @@ https://archive.ics.uci.edu/ml/machine-learning-databases/housing/ and parse training set and test set into paddle reader creators. """ +import os + import numpy as np +import tempfile +import tarfile import os import paddle.dataset.common @@ -34,8 +38,9 @@ feature_names = [ UCI_TRAIN_DATA = None UCI_TEST_DATA = None -URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fit_a_line.tar' -MD5_MODEL = '52fc3da8ef3937822fcdd87ee05c0c9b' + +FLUID_URL_MODEL = 'https://github.com/PaddlePaddle/book/raw/develop/01.fit_a_line/fluid/fit_a_line.fluid.tar' +FLUID_MD5_MODEL = '6e6dd637ccd5993961f68bfbde46090b' def feature_range(maximums, minimums): @@ -112,6 +117,33 @@ def test(): return reader +def fluid_model(): + parameter_tar = paddle.dataset.common.download(FLUID_URL_MODEL, 'uci_housing', FLUID_MD5_MODEL, 'fit_a_line.fluid.tar') + + tar = tarfile.TarFile(parameter_tar, mode='r') + dirpath = tempfile.mkdtemp() + tar.extractall(path=dirpath) + + return dirpath + +def predict_reader(): + """ + UCI_HOUSING test set creator. + + It returns a reader creator, each sample in the reader is features after + normalization and price number. + + :return: Test reader creator + :rtype: callable + """ + global UCI_TEST_DATA + load_data(paddle.dataset.common.download(URL, 'uci_housing', MD5)) + + def reader(): + for d in UCI_TEST_DATA: + yield (d[:-1],) + + return reader def fetch(): paddle.dataset.common.download(URL, 'uci_housing', MD5) -- GitLab