Sparse cross entropy loss now raises an error on invalid labels when on CPU.

On GPU, it's hard to get error metrics back, so we continue to return NaNs. Change: 133875596

Sparse cross entropy loss now raises an error on invalid labels when on CPU.
On GPU, it's hard to get error metrics back, so we continue to return NaNs. Change: 133875596
f0036364 · Eugene Brevdo · TensorFlower Gardener · af780b8c · f0036364 · f0036364
3 changed file
--- a/tensorflow/core/kernels/sparse_xent_op.cc
+++ b/tensorflow/core/kernels/sparse_xent_op.cc
@@ -17,17 +17,36 @@ limitations under the License.

 #define EIGEN_USE_THREADS

-#include "tensorflow/core/framework/op_kernel.h"
-#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
 #include "tensorflow/core/kernels/sparse_xent_op.h"
+#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
+#include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/framework/tensor_types.h"

 namespace tensorflow {

 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;

+template <typename Index>
+Status CheckInvalidLabelIndex(const Tensor& labels, int64 max_index) {
+  if (labels.NumElements() == 0) return Status::OK();
+  const auto label_values = labels.vec<Index>();
+  int64 bad_index;
+  auto min_max_dim_value = std::minmax_element(
+      label_values.data(), label_values.data() + label_values.size());
+  if (*min_max_dim_value.first < 0 || *min_max_dim_value.second >= max_index) {
+    bad_index = (*min_max_dim_value.first < 0) ? *min_max_dim_value.first
+                                               : *min_max_dim_value.second;
+    return errors::InvalidArgument("Received a label value of ", bad_index,
+                                   " which is outside the valid range of [0, ",
+                                   max_index, ").  Label values: ",
+                                   labels.SummarizeValue(labels.NumElements()));
+  }
+  return Status::OK();
+}
+
 template <typename Device, typename T, typename Index>
 class SparseSoftmaxXentWithLogitsOp : public OpKernel {
 public:
@@ -66,6 +85,10 @@ class SparseSoftmaxXentWithLogitsOp : public OpKernel {
                   context->allocate_output(1, logits.shape(), &back_out));

    if (logits.dim_size(0) > 0) {
+      if (std::is_same<Device, CPUDevice>::value) {
+        OP_REQUIRES_OK(
+            context, CheckInvalidLabelIndex<Index>(labels, logits.dim_size(1)));
+      }
      functor::SparseXentFunctor<Device, T, Index> functor;
      functor(context->eigen_device<Device>(), logits.matrix<T>(),
              labels.vec<Index>(), scratch.vec<T>(), loss_out->vec<T>(),

--- a/tensorflow/python/kernel_tests/sparse_xent_op_test.py
+++ b/tensorflow/python/kernel_tests/sparse_xent_op_test.py
@@ -72,18 +72,28 @@ class SparseXentTest(tf.test.TestCase):
        [1., 2., 3., 4.],
        [1., 2., 3., 4.]]
    labels = [4, 3, 0, -1]
-    with self.test_session(use_gpu=True) as sess:
-      loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
-          features, labels)
-      tf_loss, tf_backprop = sess.run([loss, backprop])
-      self.assertAllClose(
-          [[np.nan] * 4,
-           [0.25, 0.25, 0.25, -0.75],
-           [-0.968, 0.087, 0.237, 0.6439],
-           [np.nan] * 4],
-          tf_backprop, rtol=1e-3, atol=1e-3)
-      self.assertAllClose(
-          [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
+
+    if tf.test.is_built_with_cuda() and tf.test.is_gpu_available():
+      with self.test_session(use_gpu=True) as sess:
+        loss, backprop = (
+            gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
+                features, labels))
+        tf_loss, tf_backprop = sess.run([loss, backprop])
+        self.assertAllClose(
+            [[np.nan] * 4,
+             [0.25, 0.25, 0.25, -0.75],
+             [-0.968, 0.087, 0.237, 0.6439],
+             [np.nan] * 4],
+            tf_backprop, rtol=1e-3, atol=1e-3)
+        self.assertAllClose(
+            [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
+
+    with self.test_session(use_gpu=False) as sess:
+      loss, backprop = (
+          gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
+              features, labels))
+      with self.assertRaisesOpError("Received a label value of"):
+        sess.run([loss, backprop])

  def testNpXent(self):
    # We create 2 batches of logits for testing.

--- a/tensorflow/python/ops/nn_ops.py
+++ b/tensorflow/python/ops/nn_ops.py
@@ -560,7 +560,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):


 def softmax(logits, dim=-1, name=None):
-  """Computes softmax activations.
+  """Computes log softmax activations.

  For each batch `i` and class `j` we have

@@ -587,7 +587,7 @@ def log_softmax(logits, dim=-1, name=None):

  For each batch `i` and class `j` we have

-      logsoftmax = logits - log(reduce_sum(exp(logits), dim))
+      logsoftmax = logits - reduce_sum(exp(logits), dim)

  Args:
    logits: A non-empty `Tensor`. Must be one of the following types: `half`,
@@ -716,12 +716,14 @@ def sparse_softmax_cross_entropy_with_logits(logits, labels, name=None):
  labels of shape `[batch_size]`. But higher dimensions are supported.

  Args:
+
    logits: Unscaled log probabilities of rank `r` and shape
      `[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
    labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
      `int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
-      Other values will result in a loss of 0, but incorrect gradient
-      computations.
+      Other values will raise an exception when this op is run on CPU, and
+      return `NaN` for corresponding corresponding loss and gradient rows
+      on GPU.
    name: A name for the operation (optional).

  Returns: