提交 f0036364 编写于 作者: E Eugene Brevdo 提交者: TensorFlower Gardener

Sparse cross entropy loss now raises an error on invalid labels when on CPU.

On GPU, it's hard to get error metrics back, so we continue to return NaNs.
Change: 133875596
上级 af780b8c
......@@ -17,17 +17,36 @@ limitations under the License.
#define EIGEN_USE_THREADS
#include "tensorflow/core/framework/op_kernel.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/kernels/sparse_xent_op.h"
#include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
#include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/framework/tensor_types.h"
namespace tensorflow {
typedef Eigen::ThreadPoolDevice CPUDevice;
typedef Eigen::GpuDevice GPUDevice;
template <typename Index>
Status CheckInvalidLabelIndex(const Tensor& labels, int64 max_index) {
if (labels.NumElements() == 0) return Status::OK();
const auto label_values = labels.vec<Index>();
int64 bad_index;
auto min_max_dim_value = std::minmax_element(
label_values.data(), label_values.data() + label_values.size());
if (*min_max_dim_value.first < 0 || *min_max_dim_value.second >= max_index) {
bad_index = (*min_max_dim_value.first < 0) ? *min_max_dim_value.first
: *min_max_dim_value.second;
return errors::InvalidArgument("Received a label value of ", bad_index,
" which is outside the valid range of [0, ",
max_index, "). Label values: ",
labels.SummarizeValue(labels.NumElements()));
}
return Status::OK();
}
template <typename Device, typename T, typename Index>
class SparseSoftmaxXentWithLogitsOp : public OpKernel {
public:
......@@ -66,6 +85,10 @@ class SparseSoftmaxXentWithLogitsOp : public OpKernel {
context->allocate_output(1, logits.shape(), &back_out));
if (logits.dim_size(0) > 0) {
if (std::is_same<Device, CPUDevice>::value) {
OP_REQUIRES_OK(
context, CheckInvalidLabelIndex<Index>(labels, logits.dim_size(1)));
}
functor::SparseXentFunctor<Device, T, Index> functor;
functor(context->eigen_device<Device>(), logits.matrix<T>(),
labels.vec<Index>(), scratch.vec<T>(), loss_out->vec<T>(),
......
......@@ -72,18 +72,28 @@ class SparseXentTest(tf.test.TestCase):
[1., 2., 3., 4.],
[1., 2., 3., 4.]]
labels = [4, 3, 0, -1]
with self.test_session(use_gpu=True) as sess:
loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
features, labels)
tf_loss, tf_backprop = sess.run([loss, backprop])
self.assertAllClose(
[[np.nan] * 4,
[0.25, 0.25, 0.25, -0.75],
[-0.968, 0.087, 0.237, 0.6439],
[np.nan] * 4],
tf_backprop, rtol=1e-3, atol=1e-3)
self.assertAllClose(
[np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
if tf.test.is_built_with_cuda() and tf.test.is_gpu_available():
with self.test_session(use_gpu=True) as sess:
loss, backprop = (
gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
features, labels))
tf_loss, tf_backprop = sess.run([loss, backprop])
self.assertAllClose(
[[np.nan] * 4,
[0.25, 0.25, 0.25, -0.75],
[-0.968, 0.087, 0.237, 0.6439],
[np.nan] * 4],
tf_backprop, rtol=1e-3, atol=1e-3)
self.assertAllClose(
[np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
with self.test_session(use_gpu=False) as sess:
loss, backprop = (
gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
features, labels))
with self.assertRaisesOpError("Received a label value of"):
sess.run([loss, backprop])
def testNpXent(self):
# We create 2 batches of logits for testing.
......
......@@ -560,7 +560,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
def softmax(logits, dim=-1, name=None):
"""Computes softmax activations.
"""Computes log softmax activations.
For each batch `i` and class `j` we have
......@@ -587,7 +587,7 @@ def log_softmax(logits, dim=-1, name=None):
For each batch `i` and class `j` we have
logsoftmax = logits - log(reduce_sum(exp(logits), dim))
logsoftmax = logits - reduce_sum(exp(logits), dim)
Args:
logits: A non-empty `Tensor`. Must be one of the following types: `half`,
......@@ -716,12 +716,14 @@ def sparse_softmax_cross_entropy_with_logits(logits, labels, name=None):
labels of shape `[batch_size]`. But higher dimensions are supported.
Args:
logits: Unscaled log probabilities of rank `r` and shape
`[d_0, d_1, ..., d_{r-2}, num_classes]` and dtype `float32` or `float64`.
labels: `Tensor` of shape `[d_0, d_1, ..., d_{r-2}]` and dtype `int32` or
`int64`. Each entry in `labels` must be an index in `[0, num_classes)`.
Other values will result in a loss of 0, but incorrect gradient
computations.
Other values will raise an exception when this op is run on CPU, and
return `NaN` for corresponding corresponding loss and gradient rows
on GPU.
name: A name for the operation (optional).
Returns:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册