Push CLs 283653640, 284011539 (#7921)

* Manually quantize selected inputs before summing them up. PiperOrigin-RevId: 283653640 * Fix messy variable paths in LSTD head. With a VariableScope passed as parameter into tf.variable_scope(), TF gets all confused and starts placing variables into scopes different from operations (variable_scope != name_scope). That leads to very messy TF graphs. Before CL: FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights FeatureExtractor/LSTM/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise After CL: FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise PiperOrigin-RevId: 284011539

Push CLs 283653640, 284011539 (#7921)
* Manually quantize selected inputs before summing them up. PiperOrigin-RevId: 283653640 * Fix messy variable paths in LSTD head. With a VariableScope passed as parameter into tf.variable_scope(), TF gets all confused and starts placing variables into scopes different from operations (variable_scope != name_scope). That leads to very messy TF graphs. Before CL: FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights FeatureExtractor/LSTM/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise After CL: FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/depthwise_weights FeatureExtractor/LSTM/conv_lstm_cell/bottleneck_0/separable_conv2d/depthwise PiperOrigin-RevId: 284011539
22e20f84 · Skirmantas Kligys · Yongzhe Wang · 8a8baded · 22e20f84 · 22e20f84
2 changed file
--- a/research/lstm_object_detection/lstm/rnn_decoder.py
+++ b/research/lstm_object_detection/lstm/rnn_decoder.py
@@ -16,6 +16,16 @@
 """Custom RNN decoder."""

 import tensorflow as tf
+import lstm_object_detection.lstm.utils as lstm_utils
+
+
+class _NoVariableScope(object):
+
+  def __enter__(self):
+    return
+
+  def __exit__(self, exc_type, exc_value, traceback):
+    return False


 def rnn_decoder(decoder_inputs,
@@ -38,7 +48,7 @@ def rnn_decoder(decoder_inputs,
        * prev is a 2D Tensor of shape [batch_size x output_size],
        * i is an integer, the step number (when advanced control is needed),
        * next is a 2D Tensor of shape [batch_size x input_size].
-    scope: VariableScope for the created subgraph; defaults to "rnn_decoder".
+    scope: optional VariableScope for the created subgraph.
  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 4D Tensors with
@@ -47,7 +57,7 @@ def rnn_decoder(decoder_inputs,
        cell at each time-step. It is a 2D Tensor of shape
        [batch_size x cell.state_size].
  """
-  with tf.variable_scope(scope or 'rnn_decoder'):
+  with tf.variable_scope(scope) if scope else _NoVariableScope():
    state_tuple = initial_state
    outputs = []
    states = []
@@ -100,7 +110,7 @@ def multi_input_rnn_decoder(decoder_inputs,
      Useful when input sequences have differing numbers of channels. Final
      bottlenecks will have the same dimension.
    flatten_state: Whether the LSTM state is flattened.
-    scope: VariableScope for the created subgraph; defaults to "rnn_decoder".
+    scope: optional VariableScope for the created subgraph.
  Returns:
    A tuple of the form (outputs, state), where:
      outputs: A list of the same length as decoder_inputs of 2D Tensors with
@@ -114,7 +124,7 @@ def multi_input_rnn_decoder(decoder_inputs,
  """
  if flatten_state and len(decoder_inputs[0]) > 1:
    raise ValueError('In export mode, unroll length should not be more than 1')
-  with tf.variable_scope(scope or 'rnn_decoder'):
+  with tf.variable_scope(scope) if scope else _NoVariableScope():
    state_tuple = initial_state
    outputs = []
    states = []
@@ -136,7 +146,9 @@ def multi_input_rnn_decoder(decoder_inputs,

      action = generate_action(selection_strategy, local_step, sequence_step,
                               [batch_size, 1, 1, 1])
-      inputs, _ = select_inputs(decoder_inputs, action, local_step)
+      inputs, _ = (
+          select_inputs(decoder_inputs, action, local_step, is_training,
+                        is_quantized))
      # Mark base network endpoints under raw_inputs/
      with tf.name_scope(None):
        inputs = tf.identity(inputs, 'raw_inputs/base_endpoint')
@@ -189,7 +201,8 @@ def generate_action(selection_strategy, local_step, sequence_step,
  return tf.cast(action, tf.int32)


-def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
+def select_inputs(decoder_inputs, action, local_step, is_training, is_quantized,
+                  get_alt_inputs=False):
  """Selects sequence from decoder_inputs based on 1D actions.

  Given multiple input batches, creates a single output batch by
@@ -199,7 +212,10 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
    decoder_inputs: A 2-D list of tensor inputs.
    action: A tensor of shape [batch_size]. Each element corresponds to an index
      of decoder_inputs to choose.
-    step: The current timestep.
+    local_step: The current timestep.
+    is_training: boolean, whether the network is training. When using learned
+      selection, attempts exploration if training.
+    is_quantized: flag to enable/disable quantization mode.
    get_alt_inputs: Whether the non-chosen inputs should also be returned.

  Returns:
@@ -216,13 +232,19 @@ def select_inputs(decoder_inputs, action, local_step, get_alt_inputs=False):
      [decoder_inputs[seq_index][local_step] for seq_index in range(num_seqs)],
      axis=-1)
  action_index = tf.one_hot(action, num_seqs)
-  inputs = tf.reduce_sum(stacked_inputs * action_index, axis=-1)
+  selected_inputs = (
+      lstm_utils.quantize_op(stacked_inputs * action_index, is_training,
+                             is_quantized, scope='quant_selected_inputs'))
+  inputs = tf.reduce_sum(selected_inputs, axis=-1)
  inputs_alt = None
  # Only works for 2 models.
  if get_alt_inputs:
    # Reverse of action_index.
    action_index_alt = tf.one_hot(action, num_seqs, on_value=0.0, off_value=1.0)
-    inputs_alt = tf.reduce_sum(stacked_inputs * action_index_alt, axis=-1)
+    selected_inputs = (
+        lstm_utils.quantize_op(stacked_inputs * action_index_alt, is_training,
+                               is_quantized, scope='quant_selected_inputs_alt'))
+    inputs_alt = tf.reduce_sum(selected_inputs, axis=-1)
  return inputs, inputs_alt

 def select_state(previous_state, new_state, action):

--- a/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor.py
+++ b/research/lstm_object_detection/models/lstm_ssd_interleaved_mobilenet_v2_feature_extractor.py
@@ -241,7 +241,7 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor(
                         'not equal!')

    with slim.arg_scope(self._conv_hyperparams_fn()):
-      with tf.variable_scope('LSTM', reuse=self._reuse_weights) as lstm_scope:
+      with tf.variable_scope('LSTM', reuse=self._reuse_weights):
        output_size = (large_base_feature_shape[1], large_base_feature_shape[2])
        lstm_cell, init_state, step = self.create_lstm_cell(
            batch_size, output_size, state_saver, state_name)
@@ -257,9 +257,10 @@ class LSTMSSDInterleavedMobilenetV2FeatureExtractor(
            step,
            selection_strategy=self._interleave_method,
            is_training=self._is_training,
+            is_quantized=self._is_quantized,
            pre_bottleneck=self._pre_bottleneck,
            flatten_state=self._flatten_state,
-            scope=lstm_scope)
+            scope=None)
        self._states_out = states_out

      batcher_ops = None