From a6f284ca49e1310fcbbcd67d7d69bdf763cda1c9 Mon Sep 17 00:00:00 2001
From: Jianwei Xie <xiejw@google.com>
Date: Wed, 31 May 2017 10:39:44 -0700
Subject: [PATCH] Adds integration tests for LinearRegressor.

PiperOrigin-RevId: 157604107
---
 tensorflow/python/estimator/BUILD             |   1 +
 .../python/estimator/canned/linear_test.py    | 181 ++++++++++++++++--
 2 files changed, 164 insertions(+), 18 deletions(-)

diff --git a/tensorflow/python/estimator/BUILD b/tensorflow/python/estimator/BUILD
index c16ab6df8f3..59d1e5693bc 100644
--- a/tensorflow/python/estimator/BUILD
+++ b/tensorflow/python/estimator/BUILD
@@ -378,6 +378,7 @@ py_test(
         ":linear",
         ":metric_keys",
         ":numpy_io",
+        ":pandas_io",
         ":run_config",
         "//tensorflow/python:check_ops",
         "//tensorflow/python:client",
diff --git a/tensorflow/python/estimator/canned/linear_test.py b/tensorflow/python/estimator/canned/linear_test.py
index a5abf367431..7fda9f0e540 100644
--- a/tensorflow/python/estimator/canned/linear_test.py
+++ b/tensorflow/python/estimator/canned/linear_test.py
@@ -25,6 +25,8 @@ import tempfile
 import numpy as np
 import six
 
+from tensorflow.core.example import example_pb2
+from tensorflow.core.example import feature_pb2
 from tensorflow.python.client import session as tf_session
 from tensorflow.python.estimator import estimator
 from tensorflow.python.estimator import run_config
@@ -32,23 +34,39 @@ from tensorflow.python.estimator.canned import linear
 from tensorflow.python.estimator.canned import metric_keys
 from tensorflow.python.estimator.export import export
 from tensorflow.python.estimator.inputs import numpy_io
+from tensorflow.python.estimator.inputs import pandas_io
 from tensorflow.python.feature_column import feature_column as feature_column_lib
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import check_ops
+from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import parsing_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import test
 from tensorflow.python.training import checkpoint_utils
+from tensorflow.python.training import input as input_lib
 from tensorflow.python.training import optimizer
+from tensorflow.python.training import queue_runner
 from tensorflow.python.training import saver
 from tensorflow.python.training import session_run_hook
 
 
+try:
+  # pylint: disable=g-import-not-at-top
+  import pandas as pd
+  HAS_PANDAS = True
+except IOError:
+  # Pandas writes a temporary file during import. If it fails, don't use pandas.
+  HAS_PANDAS = False
+except ImportError:
+  HAS_PANDAS = False
+
+
 # Names of variables created by model.
 _AGE_WEIGHT_NAME = 'linear/linear_model/age/weights'
 _HEIGHT_WEIGHT_NAME = 'linear/linear_model/height/weights'
@@ -63,6 +81,22 @@ def _save_variables_to_ckpt(model_dir):
     saver.Saver().save(sess, os.path.join(model_dir, 'model.ckpt'))
 
 
+def _queue_parsed_features(feature_map):
+  tensors_to_enqueue = []
+  keys = []
+  for key, tensor in six.iteritems(feature_map):
+    keys.append(key)
+    tensors_to_enqueue.append(tensor)
+  queue_dtypes = [x.dtype for x in tensors_to_enqueue]
+  input_queue = data_flow_ops.FIFOQueue(capacity=100, dtypes=queue_dtypes)
+  queue_runner.add_queue_runner(
+      queue_runner.QueueRunner(
+          input_queue,
+          [input_queue.enqueue(tensors_to_enqueue)]))
+  dequeued_tensors = input_queue.dequeue()
+  return {keys[i]: dequeued_tensors[i] for i in range(len(dequeued_tensors))}
+
+
 class _CheckPartitionerVarHook(session_run_hook.SessionRunHook):
   """A `SessionRunHook` to check a paritioned variable."""
 
@@ -430,40 +464,29 @@ class LinearRegressorIntegrationTest(test.TestCase):
     if self._model_dir:
       shutil.rmtree(self._model_dir)
 
-  def test_complete_flow(self):
-    label_dimension = 2
-    batch_size = 10
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      label_dimension, prediction_length, batch_size):
     feature_columns = [
-        feature_column_lib.numeric_column('x', shape=(2,))
+        feature_column_lib.numeric_column('x', shape=(input_dimension,))
     ]
     est = linear.LinearRegressor(
         feature_columns=feature_columns, label_dimension=label_dimension,
         model_dir=self._model_dir)
-    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
-    data = data.reshape(batch_size, label_dimension)
 
     # TRAIN
     # learn y = x
-    train_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=data, batch_size=batch_size, num_epochs=None,
-        shuffle=True)
     est.train(train_input_fn, steps=200)
 
     # EVALUTE
-    eval_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=data, batch_size=batch_size, num_epochs=1,
-        shuffle=False)
     scores = est.evaluate(eval_input_fn)
     self.assertEqual(200, scores[ops.GraphKeys.GLOBAL_STEP])
     self.assertIn(metric_keys.MetricKeys.LOSS, six.iterkeys(scores))
 
     # PREDICT
-    predict_input_fn = numpy_io.numpy_input_fn(
-        x={'x': data}, y=None, batch_size=batch_size, num_epochs=1,
-        shuffle=False)
-    predictions = list(
-        [x['predictions'] for x in est.predict(predict_input_fn)])
-    self.assertAllClose(data, predictions, atol=0.01)
+    predictions = np.array([
+        x['predictions'] for x in est.predict(predict_input_fn)])
+    self.assertAllEqual((prediction_length, label_dimension), predictions.shape)
 
     # EXPORT
     feature_spec = feature_column_lib.make_parse_example_spec(
@@ -474,6 +497,128 @@ class LinearRegressorIntegrationTest(test.TestCase):
                                        serving_input_receiver_fn)
     self.assertTrue(gfile.Exists(export_dir))
 
+  def test_numpy_input_fn(self):
+    """Tests complete flow with numpy_input_fn."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=data, batch_size=batch_size, num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=data, batch_size=batch_size, num_epochs=1,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': data}, y=None, batch_size=batch_size, num_epochs=1,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length,
+        batch_size=batch_size)
+
+  def test_pandas_input_fn(self):
+    """Tests complete flow with pandas_input_fn."""
+    if not HAS_PANDAS:
+      return
+
+    # Pandas DataFrame natually supports 1 dim data only.
+    label_dimension = 1
+    input_dimension = label_dimension
+    batch_size = 10
+    data = np.array([1., 2., 3., 4.], dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    prediction_length = 4
+
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length,
+        batch_size=batch_size)
+
+  def test_input_fn_from_parse_example(self):
+    """Tests complete flow with input_fn constructed from parse_example."""
+    label_dimension = 2
+    input_dimension = label_dimension
+    batch_size = 10
+    prediction_length = batch_size
+    data = np.linspace(0., 2., batch_size * label_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, label_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+              'y': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(
+                      value=datum[:label_dimension])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([label_dimension], dtypes.float32),
+    }
+
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        label_dimension=label_dimension,
+        prediction_length=prediction_length,
+        batch_size=batch_size)
+
 
 def _assert_close(expected, actual, rtol=1e-04, name='assert_close'):
   with ops.name_scope(name, 'assert_close', (expected, actual, rtol)) as scope:
-- 
GitLab