From a6f284ca49e1310fcbbcd67d7d69bdf763cda1c9 Mon Sep 17 00:00:00 2001 From: Jianwei Xie Date: Wed, 31 May 2017 10:39:44 -0700 Subject: [PATCH] Adds integration tests for LinearRegressor. PiperOrigin-RevId: 157604107 --- tensorflow/python/estimator/BUILD | 1 + .../python/estimator/canned/linear_test.py | 181 ++++++++++++++++-- 2 files changed, 164 insertions(+), 18 deletions(-) diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD index c16ab6df8f3..59d1e5693bc 100644 --- a/tensorflow/python/estimator/BUILD +++ b/tensorflow/python/estimator/BUILD @@ -378,6 +378,7 @@ py_test( ":linear", ":metric_keys", ":numpy_io", + ":pandas_io", ":run_config", "//tensorflow/python:check_ops", "//tensorflow/python:client", diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py index a5abf367431..7fda9f0e540 100644 --- a/tensorflow/python/estimator/canned/linear_test.py +++ b/tensorflow/python/estimator/canned/linear_test.py @@ -25,6 +25,8 @@ import tempfile import numpy as np import six +from tensorflow.core.example import example_pb2 +from tensorflow.core.example import feature_pb2 from tensorflow.python.client import session as tf_session from tensorflow.python.estimator import estimator from tensorflow.python.estimator import run_config @@ -32,23 +34,39 @@ from tensorflow.python.estimator.canned import linear from tensorflow.python.estimator.canned import metric_keys from tensorflow.python.estimator.export import export from tensorflow.python.estimator.inputs import numpy_io +from tensorflow.python.estimator.inputs import pandas_io from tensorflow.python.feature_column import feature_column as feature_column_lib from tensorflow.python.framework import dtypes from tensorflow.python.framework import ops from tensorflow.python.framework import sparse_tensor from tensorflow.python.ops import check_ops +from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import math_ops +from tensorflow.python.ops import parsing_ops from tensorflow.python.ops import state_ops from tensorflow.python.ops import variable_scope from tensorflow.python.ops import variables from tensorflow.python.platform import gfile from tensorflow.python.platform import test from tensorflow.python.training import checkpoint_utils +from tensorflow.python.training import input as input_lib from tensorflow.python.training import optimizer +from tensorflow.python.training import queue_runner from tensorflow.python.training import saver from tensorflow.python.training import session_run_hook +try: + # pylint: disable=g-import-not-at-top + import pandas as pd + HAS_PANDAS = True +except IOError: + # Pandas writes a temporary file during import. If it fails, don't use pandas. + HAS_PANDAS = False +except ImportError: + HAS_PANDAS = False + + # Names of variables created by model. _AGE_WEIGHT_NAME = 'linear/linear_model/age/weights' _HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights' @@ -63,6 +81,22 @@ def _save_variables_to_ckpt(model_dir): saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt')) +def _queue_parsed_features(feature_map): + tensors_to_enqueue = [] + keys = [] + for key, tensor in six.iteritems(feature_map): + keys.append(key) + tensors_to_enqueue.append(tensor) + queue_dtypes = [x.dtype for x in tensors_to_enqueue] + input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes) + queue_runner.add_queue_runner( + queue_runner.QueueRunner( + input_queue, + [input_queue.enqueue(tensors_to_enqueue)])) + dequeued_tensors = input_queue.dequeue() + return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))} + + class _CheckPartitionerVarHook(session_run_hook.SessionRunHook): """A `SessionRunHook` to check a paritioned variable.""" @@ -430,40 +464,29 @@ class LinearRegressorIntegrationTest(test.TestCase): if self._model_dir: shutil.rmtree(self._model_dir) - def test_complete_flow(self): - label_dimension = 2 - batch_size = 10 + def _test_complete_flow( + self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension, + label_dimension, prediction_length, batch_size): feature_columns = [ - feature_column_lib.numeric_column('x', shape=(2,)) + feature_column_lib.numeric_column('x', shape=(input_dimension,)) ] est = linear.LinearRegressor( feature_columns=feature_columns, label_dimension=label_dimension, model_dir=self._model_dir) - data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) - data = data.reshape(batch_size, label_dimension) # TRAIN # learn y = x - train_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, - shuffle=True) est.train(train_input_fn, steps=200) # EVALUTE - eval_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, - shuffle=False) scores = est.evaluate(eval_input_fn) self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP]) self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores)) # PREDICT - predict_input_fn = numpy_io.numpy_input_fn( - x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, - shuffle=False) - predictions = list( - [x['predictions'] for x in est.predict(predict_input_fn)]) - self.assertAllClose(data, predictions, atol=0.01) + predictions = np.array([ + x['predictions'] for x in est.predict(predict_input_fn)]) + self.assertAllEqual((prediction_length, label_dimension), predictions.shape) # EXPORT feature_spec = feature_column_lib.make_parse_example_spec( @@ -474,6 +497,128 @@ class LinearRegressorIntegrationTest(test.TestCase): serving_input_receiver_fn) self.assertTrue(gfile.Exists(export_dir)) + def test_numpy_input_fn(self): + """Tests complete flow with numpy_input_fn.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + train_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, y=data, batch_size=batch_size, num_epochs=None, + shuffle=True) + eval_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, y=data, batch_size=batch_size, num_epochs=1, + shuffle=False) + predict_input_fn = numpy_io.numpy_input_fn( + x={'x': data}, y=None, batch_size=batch_size, num_epochs=1, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length, + batch_size=batch_size) + + def test_pandas_input_fn(self): + """Tests complete flow with pandas_input_fn.""" + if not HAS_PANDAS: + return + + # Pandas DataFrame natually supports 1 dim data only. + label_dimension = 1 + input_dimension = label_dimension + batch_size = 10 + data = np.array([1., 2., 3., 4.], dtype=np.float32) + x = pd.DataFrame({'x': data}) + y = pd.Series(data) + prediction_length = 4 + + train_input_fn = pandas_io.pandas_input_fn( + x=x, + y=y, + batch_size=batch_size, + num_epochs=None, + shuffle=True) + eval_input_fn = pandas_io.pandas_input_fn( + x=x, + y=y, + batch_size=batch_size, + shuffle=False) + predict_input_fn = pandas_io.pandas_input_fn( + x=x, + batch_size=batch_size, + shuffle=False) + + self._test_complete_flow( + train_input_fn=train_input_fn, + eval_input_fn=eval_input_fn, + predict_input_fn=predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length, + batch_size=batch_size) + + def test_input_fn_from_parse_example(self): + """Tests complete flow with input_fn constructed from parse_example.""" + label_dimension = 2 + input_dimension = label_dimension + batch_size = 10 + prediction_length = batch_size + data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32) + data = data.reshape(batch_size, label_dimension) + + serialized_examples = [] + for datum in data: + example = example_pb2.Example(features=feature_pb2.Features( + feature={ + 'x': feature_pb2.Feature( + float_list=feature_pb2.FloatList(value=datum)), + 'y': feature_pb2.Feature( + float_list=feature_pb2.FloatList( + value=datum[:label_dimension])), + })) + serialized_examples.append(example.SerializeToString()) + + feature_spec = { + 'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32), + 'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32), + } + + def _train_input_fn(): + feature_map = parsing_ops.parse_example(serialized_examples, feature_spec) + features = _queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + def _eval_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = _queue_parsed_features(feature_map) + labels = features.pop('y') + return features, labels + def _predict_input_fn(): + feature_map = parsing_ops.parse_example( + input_lib.limit_epochs(serialized_examples, num_epochs=1), + feature_spec) + features = _queue_parsed_features(feature_map) + features.pop('y') + return features, None + + self._test_complete_flow( + train_input_fn=_train_input_fn, + eval_input_fn=_eval_input_fn, + predict_input_fn=_predict_input_fn, + input_dimension=input_dimension, + label_dimension=label_dimension, + prediction_length=prediction_length, + batch_size=batch_size) + def _assert_close(expected, actual, rtol=1e-04, name='assert_close'): with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope: -- GitLab