Automated rollback of commit 0fd8d482

PiperOrigin-RevId: 257384893

Automated rollback of commit 0fd8d482
PiperOrigin-RevId: 257384893
a216c03f · A. Unique TensorFlower · TensorFlower Gardener · 114b8e76 · a216c03f · a216c03f
3 changed file
--- a/tensorflow/python/kernel_tests/gather_op_test.py
+++ b/tensorflow/python/kernel_tests/gather_op_test.py
@@ -21,7 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np

-from tensorflow.python.eager import backprop
+from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -339,27 +339,15 @@ class GatherTest(test.TestCase, parameterized.TestCase):
  ])
  @test_util.run_in_graph_and_eager_modes
  def testBatchDims(self, params, indices, batch_dims, expected=None,
-                    axis=None, expected_gradient_shape=None):
+                    axis=None):
    result = array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims)
    self.assertAllEqual(expected, result)

-    # Test the gradients shape.
-    if context.executing_eagerly():
-      with backprop.GradientTape() as tape:
-        zeros = array_ops.zeros_like(params, dtype=dtypes.float32)
-        tape.watch(zeros)
-        values = zeros * 2 + zeros
-        result = array_ops.gather(
-            values, indices, axis=axis, batch_dims=batch_dims)
-      gradients = tape.gradient(result, zeros)
-    else:
-      zeros = array_ops.zeros_like(params, dtype=dtypes.float32)
-      values = zeros * 2 + zeros
+    with compat.forward_compatibility_horizon(2019, 6, 11):
      result = array_ops.gather(
-          values, indices, axis=axis, batch_dims=batch_dims)
-      gradients = gradients_impl.gradients(result, [zeros])[0]
+          params, indices, axis=axis, batch_dims=batch_dims)

-    self.assertAllEqual(array_ops.shape(params), array_ops.shape(gradients))
+    self.assertAllEqual(expected, result)

  @parameterized.parameters([
      dict(
@@ -455,6 +443,13 @@ class GatherTest(test.TestCase, parameterized.TestCase):
    self.assertAllEqual(output_shape, result.shape.as_list())
    self.assertAllEqual(expected, result)

+    with compat.forward_compatibility_horizon(2019, 6, 11):
+      result = array_ops.gather(
+          params, indices, axis=axis, batch_dims=batch_dims)
+
+    self.assertAllEqual(output_shape, result.shape.as_list())
+    self.assertAllEqual(expected, result)
+
  def _batchNumpyGather(self, params, indices, axis, batch_dims):
    """Performs a batch gather by making recursive calls to np.take().


--- a/tensorflow/python/ops/array_grad.py
+++ b/tensorflow/python/ops/array_grad.py
@@ -31,7 +31,6 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import gen_array_ops
-from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sparse_ops
@@ -478,61 +477,6 @@ def _GatherGrad(op, grad):
  return [ops.IndexedSlices(values, indices, params_shape), None]


-def _GetBatchIndices(params_shape, indices, batch_dims):
-  """Addds the batch offsets to the given indices and returns the results."""
-  batch_indices = indices
-  indices_ndims = indices.shape.ndims
-  indices_dtype = indices.dtype.base_dtype
-  casted_params_shape = math_ops.cast(params_shape, indices_dtype)
-  accum_dim_value = array_ops.ones((), dtype=indices_dtype)
-  for dim in range(batch_dims, 0, -1):
-    dim_value = casted_params_shape[dim - 1]
-    accum_dim_value *= casted_params_shape[dim]
-    start = array_ops.zeros((), dtype=indices_dtype)
-    step = array_ops.ones((), dtype=indices_dtype)
-    dim_indices = math_ops.range(start, dim_value, step)
-    dim_indices *= accum_dim_value
-    dim_shape = array_ops.stack(
-        [1] * (dim - 1) + [dim_value] + [1] * (indices_ndims - dim), axis=0)
-    batch_indices += array_ops.reshape(dim_indices, dim_shape)
-
-  return batch_indices
-
-
-def _BatchGatherGrad(
-    params_shape, values, indices, batch_dims, gather_dim_size):
-  """Returns the gradient of GatherV2 with batch dimensions."""
-
-  # Axis is the first non-batch dimension.
-  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
-  if batch_dims:
-    values_shape = array_ops.shape(values)
-    # Add the batch offsets to indices and flatten the batch dimensions.
-    outer_shape = values_shape[:batch_dims]
-    inner_shape = values_shape[batch_dims:][1:]
-    batch_size = gen_math_ops.prod(outer_shape, [0], False)
-    flat_values_shape = array_ops.concat([[-1], inner_shape], 0)
-    gather_dim_size *= batch_size
-
-    indices = _GetBatchIndices(params_shape, indices, batch_dims)
-
-    with warnings.catch_warnings():
-      warnings.filterwarnings(
-          "ignore",
-          message="Converting sparse IndexedSlices to a dense Tensor.*")
-      values = array_ops.reshape(values, flat_values_shape)
-
-  indices = array_ops.reshape(indices, indices_size)
-  params_grad = math_ops.unsorted_segment_sum(values, indices, gather_dim_size)
-
-  if batch_dims:
-    # Put back the batch dimensions.
-    params_grad = array_ops.reshape(
-        params_grad, array_ops.concat([outer_shape, flat_values_shape], 0))
-
-  return params_grad
-
-
 @ops.RegisterGradient("GatherV2")
 def _GatherV2Grad(op, grad):
  """Gradient for GatherV2 op."""
@@ -551,10 +495,6 @@ def _GatherV2Grad(op, grad):
  indices_size = array_ops.expand_dims(array_ops.size(indices), 0)
  axis = op.inputs[2]
  axis_static = tensor_util.constant_value(axis)
-  batch_dims = int(op.get_attr("batch_dims"))
-
-  if batch_dims < 0:
-    batch_dims += indices.shape.ndims

  # For axis 0 gathers, build an appropriately shaped IndexedSlices.
  if axis_static == 0:
@@ -569,45 +509,44 @@ def _GatherV2Grad(op, grad):
          message="Converting sparse IndexedSlices to a dense Tensor.*")
      values = array_ops.reshape(grad, values_shape)
    indices = array_ops.reshape(indices, indices_size)
-    params_grad = ops.IndexedSlices(values, indices, params_shape)
-  else:
-    # Handle axis by transposing the axis dimension to be the first non-batch
-    # dimension, compute the gradiend and transpose the result back.
-    outer_shape = params_shape[:axis]
-    inner_shape = params_shape[axis:][1:]
-    values_shape = array_ops.concat([outer_shape, [-1], inner_shape], 0)
+    return [ops.IndexedSlices(values, indices, params_shape), None, None]

-    values_dims = array_ops.size(values_shape)
-    axis_dims = array_ops.size(outer_shape)
+  outer_shape = params_shape[:axis]
+  outer_dims = array_ops.size(outer_shape)
+  inner_shape = params_shape[axis:][1:]
+  inner_dims = array_ops.size(inner_shape)

-    outer_batches_indices = math_ops.range(batch_dims)
-    batch_axis_indices = math_ops.range(batch_dims, axis_dims)
-    inner_axes_indices = math_ops.range(axis_dims + 1, values_dims)
+  outer_axes_indices = math_ops.range(outer_dims)
+  inner_axes_indices = math_ops.range(outer_dims + 1,
+                                      outer_dims + 1 + inner_dims)

-    with warnings.catch_warnings():
-      warnings.filterwarnings(
-          "ignore",
-          message="Converting sparse IndexedSlices to a dense Tensor.*")
-      values = array_ops.reshape(grad, values_shape)
-
-    # Move values[axis] up to values[batch_dims]
-    transpose_dims = array_ops.concat(
-        [outer_batches_indices, [axis_dims], batch_axis_indices,
-         inner_axes_indices],
-        0)
-    values_transpose = array_ops.transpose(values, transpose_dims)
-
-    params_grad = _BatchGatherGrad(params_shape, values_transpose, indices,
-                                   batch_dims, params_shape[axis])
-
-    # Inverts the above transpose by moving dimension batch_dims back to its
-    # original position.
-    invert_transpose_dims = array_ops.concat(
-        [outer_batches_indices, batch_axis_indices + 1, [batch_dims],
-         inner_axes_indices],
-        0)
-    params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
+  values_shape = array_ops.concat([outer_shape, indices_size, inner_shape], 0)
+  with warnings.catch_warnings():
+    warnings.filterwarnings(
+        "ignore",
+        message="Converting sparse IndexedSlices to a dense Tensor.*")
+    values = array_ops.reshape(grad, values_shape)
+  indices = array_ops.reshape(indices, indices_size)

+  # We need to sum up every slice `values[..., i, ....]` corresponding to
+  # `params[..., indices[i], ...]`. Since `unsorted_segment_sum` does not
+  # support an axis parameter, we transpose the gather dimension to the front,
+  # then use `unsorted_segment_sum` to build a
+  # [gather_axis, outer_axes, inner_axes] tensor with all the gradients
+  # affecting each index in `gather_axis` summed up.
+  transpose_dims = array_ops.concat(
+      [[outer_dims], outer_axes_indices, inner_axes_indices], 0)
+  values_transpose = array_ops.transpose(values, transpose_dims)
+  num_segments = params_shape[axis]
+
+  params_grad = math_ops.unsorted_segment_sum(values_transpose, indices,
+                                              num_segments)
+
+  # Inverts the above transpose by moving dimension 0 back to its original
+  # position.
+  invert_transpose_dims = array_ops.concat(
+      [outer_axes_indices + 1, [0], inner_axes_indices], 0)
+  params_grad = array_ops.transpose(params_grad, invert_transpose_dims)
  return [params_grad, None, None]



--- a/tensorflow/python/ops/array_ops.py
+++ b/tensorflow/python/ops/array_ops.py
@@ -3807,19 +3807,36 @@ def gather(params,
    A `Tensor`. Has the same type as `params`.
  """
  del validate_indices
-
+  if compat.forward_compatible(2019, 8, 10):
+    if axis is None:
+      axis = batch_dims
+    if axis != 0:
+      return gen_array_ops.gather_v2(
+          params, indices, axis, batch_dims=batch_dims, name=name)
+    try:
+      # TODO(apassos) find a less bad way of detecting resource variables
+      # without introducing a circular dependency.
+      return params.sparse_read(indices, name=name)
+    except AttributeError:
+      return gen_array_ops.gather_v2(
+          params, indices, axis, name=name)
+
+  if batch_dims != 0:
+    with ops.name_scope(name, "Gather", [params, indices, axis]):
+      return _batch_gather(params, indices, batch_dims, axis)
  if axis is None:
    axis = batch_dims
  if axis != 0:
-    return gen_array_ops.gather_v2(
-        params, indices, axis, batch_dims=batch_dims, name=name)
+    # Note that we do a sparse_read here to avoid snapshotting the entire
+    # resource variable and doing a gather, which can be inefficient and lead to
+    # subtle race conditions. TODO(apassos) implement axis != 0 on sparse_read
+    return gen_array_ops.gather_v2(params, indices, axis, name=name)
  try:
-    # TODO(apassos) find a less bad way of detecting resource variables
-    # without introducing a circular dependency.
+    # TODO(apassos) find a less bad way of detecting resource variables without
+    # introducing a circular dependency.
    return params.sparse_read(indices, name=name)
  except AttributeError:
-    return gen_array_ops.gather_v2(
-        params, indices, axis, name=name)
+    return gen_array_ops.gather_v2(params, indices, axis, name=name)


 @tf_export("gather", v1=[])