Remove so many more hourglass imports

Change: 143230429

Remove so many more hourglass imports
Change: 143230429
e121667d · Justine Tunney · TensorFlower Gardener · 7815fcba · e121667d · e121667d
396 changed file
--- a/tensorflow/contrib/BUILD
+++ b/tensorflow/contrib/BUILD
@@ -48,9 +48,8 @@ py_library(
        "//tensorflow/contrib/solvers:solvers_py",
        "//tensorflow/contrib/specs",
        "//tensorflow/contrib/stat_summarizer:stat_summarizer_py",
-        "//tensorflow/contrib/tensor_forest:tensor_forest_ops_py",
-        "//tensorflow/contrib/tensor_forest:tensor_forest_py",
-        "//tensorflow/contrib/tensor_forest/hybrid:ops_lib",
+        "//tensorflow/contrib/tensor_forest:init_py",
+        "//tensorflow/contrib/tensor_forest/hybrid:ops_lib",  # XXX: no ref but need for pip
        "//tensorflow/contrib/tensorboard",
        "//tensorflow/contrib/testing:testing_py",
        "//tensorflow/contrib/tfprof",

--- a/tensorflow/contrib/bayesflow/BUILD
+++ b/tensorflow/contrib/bayesflow/BUILD
@@ -26,6 +26,8 @@ py_library(
        "//tensorflow/python:training",
        "//tensorflow/python:util",
        "//tensorflow/python:variable_scope",
+        "//third_party/py/numpy",
+        "@six_archive//:six",
    ],
 )

@@ -35,9 +37,16 @@ cuda_py_test(
    srcs = ["python/kernel_tests/entropy_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
+        "//tensorflow/python:nn_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
    ],
 )

@@ -47,9 +56,17 @@ cuda_py_test(
    srcs = ["python/kernel_tests/stochastic_variables_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:variable_scope",
+        "//tensorflow/python:variables",
    ],
 )

@@ -59,8 +76,13 @@ cuda_py_test(
    srcs = ["python/kernel_tests/monte_carlo_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
    ],
 )
@@ -71,9 +93,13 @@ cuda_py_test(
    srcs = ["python/kernel_tests/special_math_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
    ],
 )

@@ -83,8 +109,14 @@ cuda_py_test(
    srcs = ["python/kernel_tests/stochastic_graph_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
    ],
 )
@@ -95,9 +127,15 @@ cuda_py_test(
    srcs = ["python/kernel_tests/variational_inference_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/contrib/layers:layers_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
    ],
 )

@@ -107,7 +145,11 @@ cuda_py_test(
    srcs = ["python/kernel_tests/stochastic_tensor_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
        "//tensorflow/python:platform_test",
    ],
@@ -119,9 +161,15 @@ cuda_py_test(
    srcs = ["python/kernel_tests/stochastic_gradient_estimators_test.py"],
    additional_deps = [
        ":bayesflow_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/contrib/distributions:distributions_py",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
    ],
 )


--- a/tensorflow/contrib/bayesflow/python/kernel_tests/entropy_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/entropy_test.py
@@ -18,12 +18,28 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

+import sys
+
+# TODO: #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags'):
+  import ctypes
+  sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
+
 import numpy as np
-import tensorflow as tf

-distributions = tf.contrib.distributions
-layers = tf.contrib.layers
-entropy = tf.contrib.bayesflow.entropy
+from tensorflow.contrib import distributions as distributions_lib
+from tensorflow.contrib import layers as layers_lib
+from tensorflow.contrib.bayesflow.python.ops import entropy as entropy_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+distributions = distributions_lib
+layers = layers_lib
+entropy = entropy_lib


 class NormalNoEntropy(distributions.Normal):  # pylint: disable=no-init
@@ -34,10 +50,10 @@ class NormalNoEntropy(distributions.Normal):  # pylint: disable=no-init


 def get_train_op(scalar_loss, optimizer='SGD', learning_rate=1.0, decay=0.0):
-  global_step = tf.Variable(0)
+  global_step = variables.Variable(0)

  def decay_fn(rate, t):
-    return rate * (1 + tf.to_float(t))**(-decay)
+    return rate * (1 + math_ops.to_float(t))**(-decay)

  train_op = layers.optimize_loss(
      scalar_loss,
@@ -59,7 +75,7 @@ def _assert_monotonic_increasing(array, atol=1e-5):
  np.testing.assert_array_less(-1 * atol, diff)


-class ElboRatioTest(tf.test.TestCase):
+class ElboRatioTest(test.TestCase):
  """Show sampling converges to true KL values."""

  def setUp(self):
@@ -142,7 +158,7 @@ class ElboRatioTest(tf.test.TestCase):
      self.assertAllClose(np.zeros(2), sample_kl.eval())


-class EntropyShannonTest(tf.test.TestCase):
+class EntropyShannonTest(test.TestCase):

  def test_normal_entropy_default_form_uses_exact_entropy(self):
    with self.test_session():
@@ -176,7 +192,7 @@ class EntropyShannonTest(tf.test.TestCase):
      self.assertAllClose(exact_entropy.eval(), mc_entropy.eval(), rtol=0.01)

      # Make sure there is some error, proving we used samples
-      self.assertLess(0.0001, tf.abs(exact_entropy - mc_entropy).eval())
+      self.assertLess(0.0001, math_ops.abs(exact_entropy - mc_entropy).eval())

  def test_default_entropy_falls_back_on_sample_if_analytic_not_available(self):
    # Tested by showing we get a good answer that is not exact.
@@ -197,10 +213,10 @@ class EntropyShannonTest(tf.test.TestCase):
      self.assertAllClose(exact_entropy.eval(), mc_entropy.eval(), rtol=0.01)

      # Make sure there is some error, proving we used samples
-      self.assertLess(0.0001, tf.abs(exact_entropy - mc_entropy).eval())
+      self.assertLess(0.0001, math_ops.abs(exact_entropy - mc_entropy).eval())


-class RenyiRatioTest(tf.test.TestCase):
+class RenyiRatioTest(test.TestCase):
  """Show renyi_ratio is minimized when the distributions match."""

  def setUp(self):
@@ -216,22 +232,23 @@ class RenyiRatioTest(tf.test.TestCase):
      target = distributions.MultivariateNormalCholesky(mu_true, chol_true)

      # Set up q distribution by defining mean/covariance as Variables
-      mu = tf.Variable(np.zeros(mu_true.shape), dtype=mu_true.dtype, name='mu')
-      mat = tf.Variable(
+      mu = variables.Variable(
+          np.zeros(mu_true.shape), dtype=mu_true.dtype, name='mu')
+      mat = variables.Variable(
          np.zeros(chol_true.shape), dtype=chol_true.dtype, name='mat')
-      chol = distributions.matrix_diag_transform(mat, transform=tf.nn.softplus)
+      chol = distributions.matrix_diag_transform(mat, transform=nn_ops.softplus)
      q = distributions.MultivariateNormalCholesky(mu, chol)
      for alpha in [0.25, 0.75]:

        negative_renyi_divergence = entropy.renyi_ratio(
            log_p=target.log_prob, q=q, n=n, alpha=alpha, seed=0)
        train_op = get_train_op(
-            tf.reduce_mean(-negative_renyi_divergence),
+            math_ops.reduce_mean(-negative_renyi_divergence),
            optimizer='SGD',
            learning_rate=0.5,
            decay=0.1)

-        tf.global_variables_initializer().run()
+        variables.global_variables_initializer().run()
        renyis = []
        for step in range(1000):
          sess.run(train_op)
@@ -262,12 +279,12 @@ class RenyiRatioTest(tf.test.TestCase):
        self.assertAllClose(np.zeros(2), negative_renyi_divergence.eval())


-class RenyiAlphaTest(tf.test.TestCase):
+class RenyiAlphaTest(test.TestCase):

  def test_with_three_alphas(self):
    with self.test_session():
-      for dtype in (tf.float32, tf.float64):
-        alpha_min = tf.constant(0.0, dtype=dtype)
+      for dtype in (dtypes.float32, dtypes.float64):
+        alpha_min = constant_op.constant(0.0, dtype=dtype)
        alpha_max = 0.5
        decay_time = 3

@@ -334,4 +351,4 @@ class RenyiAlphaTest(tf.test.TestCase):


 if __name__ == '__main__':
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/monte_carlo_test.py
@@ -18,22 +18,36 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+import sys

-distributions = tf.contrib.distributions
-layers = tf.contrib.layers
-monte_carlo = tf.contrib.bayesflow.monte_carlo
+# TODO: #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags'):
+  import ctypes
+  sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)

+from tensorflow.contrib import distributions as distributions_lib
+from tensorflow.contrib import layers as layers_lib
+from tensorflow.contrib.bayesflow.python.ops import monte_carlo as monte_carlo_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import random_seed
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test

-class ExpectationImportanceSampleTest(tf.test.TestCase):
+distributions = distributions_lib
+layers = layers_lib
+monte_carlo = monte_carlo_lib
+
+
+class ExpectationImportanceSampleTest(test.TestCase):

  def test_normal_integral_mean_and_var_correctly_estimated(self):
    n = int(1e6)
    with self.test_session():
-      mu_p = tf.constant([-1.0, 1.0], dtype=tf.float64)
-      mu_q = tf.constant([0.0, 0.0], dtype=tf.float64)
-      sigma_p = tf.constant([0.5, 0.5], dtype=tf.float64)
-      sigma_q = tf.constant([1.0, 1.0], dtype=tf.float64)
+      mu_p = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64)
+      mu_q = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
+      sigma_p = constant_op.constant([0.5, 0.5], dtype=dtypes.float64)
+      sigma_q = constant_op.constant([1.0, 1.0], dtype=dtypes.float64)
      p = distributions.Normal(mu=mu_p, sigma=sigma_p)
      q = distributions.Normal(mu=mu_q, sigma=sigma_q)

@@ -43,13 +57,9 @@ class ExpectationImportanceSampleTest(tf.test.TestCase):

      # Compute E_p[X^2].
      e_x2 = monte_carlo.expectation_importance_sampler(
-          f=tf.square,
-          log_p=p.log_prob,
-          sampling_dist_q=q,
-          n=n,
-          seed=42)
+          f=math_ops.square, log_p=p.log_prob, sampling_dist_q=q, n=n, seed=42)

-      stdev = tf.sqrt(e_x2 - tf.square(e_x))
+      stdev = math_ops.sqrt(e_x2 - math_ops.square(e_x))

      # Relative tolerance (rtol) chosen 2 times as large as minimim needed to
      # pass.
@@ -72,8 +82,8 @@ class ExpectationImportanceSampleTest(tf.test.TestCase):
      # Compute E_p[X_1 * X_2 > 0], with X_i the ith component of X ~ p(x).
      # Should equal 1/2 because p is a spherical Gaussian centered at (0, 0).
      def indicator(x):
-        x1_times_x2 = tf.reduce_prod(x, reduction_indices=[-1])
-        return 0.5 * (tf.sign(x1_times_x2) + 1.0)
+        x1_times_x2 = math_ops.reduce_prod(x, reduction_indices=[-1])
+        return 0.5 * (math_ops.sign(x1_times_x2) + 1.0)

      prob = monte_carlo.expectation_importance_sampler(
          f=indicator, log_p=p.log_prob, sampling_dist_q=q, n=n, seed=42)
@@ -85,28 +95,28 @@ class ExpectationImportanceSampleTest(tf.test.TestCase):
      self.assertAllClose(0.5, prob.eval(), rtol=0.05)


-class ExpectationImportanceSampleLogspaceTest(tf.test.TestCase):
+class ExpectationImportanceSampleLogspaceTest(test.TestCase):

  def test_normal_distribution_second_moment_estimated_correctly(self):
    # Test the importance sampled estimate against an analytical result.
    n = int(1e6)
    with self.test_session():
-      mu_p = tf.constant([0.0, 0.0], dtype=tf.float64)
-      mu_q = tf.constant([-1.0, 1.0], dtype=tf.float64)
-      sigma_p = tf.constant([1.0, 2 / 3.], dtype=tf.float64)
-      sigma_q = tf.constant([1.0, 1.0], dtype=tf.float64)
+      mu_p = constant_op.constant([0.0, 0.0], dtype=dtypes.float64)
+      mu_q = constant_op.constant([-1.0, 1.0], dtype=dtypes.float64)
+      sigma_p = constant_op.constant([1.0, 2 / 3.], dtype=dtypes.float64)
+      sigma_q = constant_op.constant([1.0, 1.0], dtype=dtypes.float64)
      p = distributions.Normal(mu=mu_p, sigma=sigma_p)
      q = distributions.Normal(mu=mu_q, sigma=sigma_q)

      # Compute E_p[X^2].
      # Should equal [1, (2/3)^2]
      log_e_x2 = monte_carlo.expectation_importance_sampler_logspace(
-          log_f=lambda x: tf.log(tf.square(x)),
+          log_f=lambda x: math_ops.log(math_ops.square(x)),
          log_p=p.log_prob,
          sampling_dist_q=q,
          n=n,
          seed=42)
-      e_x2 = tf.exp(log_e_x2)
+      e_x2 = math_ops.exp(log_e_x2)

      # Relative tolerance (rtol) chosen 2 times as large as minimim needed to
      # pass.
@@ -114,18 +124,18 @@ class ExpectationImportanceSampleLogspaceTest(tf.test.TestCase):
      self.assertAllClose([1., (2 / 3.)**2], e_x2.eval(), rtol=0.02)


-class ExpectationTest(tf.test.TestCase):
+class ExpectationTest(test.TestCase):

  def test_mc_estimate_of_normal_mean_and_variance_is_correct_vs_analytic(self):
-    tf.set_random_seed(0)
+    random_seed.set_random_seed(0)
    n = 20000
    with self.test_session():
      p = distributions.Normal(mu=[1.0, -1.0], sigma=[0.3, 0.5])
      # Compute E_p[X] and E_p[X^2].
      z = p.sample_n(n=n)
      e_x = monte_carlo.expectation(lambda x: x, p, z=z, seed=42)
-      e_x2 = monte_carlo.expectation(tf.square, p, z=z, seed=0)
-      var = e_x2 - tf.square(e_x)
+      e_x2 = monte_carlo.expectation(math_ops.square, p, z=z, seed=0)
+      var = e_x2 - math_ops.square(e_x)

      self.assertEqual(p.get_batch_shape(), e_x.get_shape())
      self.assertEqual(p.get_batch_shape(), e_x2.get_shape())
@@ -136,7 +146,7 @@ class ExpectationTest(tf.test.TestCase):
      self.assertAllClose(p.variance().eval(), var.eval(), rtol=0.02)


-class GetSamplesTest(tf.test.TestCase):
+class GetSamplesTest(test.TestCase):
  """Test the private method 'get_samples'."""

  def test_raises_if_both_z_and_n_are_none(self):
@@ -177,4 +187,4 @@ class GetSamplesTest(tf.test.TestCase):


 if __name__ == '__main__':
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/special_math_test.py
@@ -23,9 +23,15 @@ import collections
 import numpy as np
 from scipy import special
 from scipy import stats
-import tensorflow as tf

-sm = tf.contrib.bayesflow.special_math
+from tensorflow.contrib.bayesflow.python.ops import special_math
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+sm = special_math


 def _check_strictly_increasing(array_1d):
@@ -37,8 +43,7 @@ def _make_grid(dtype, grid_spec):
  """Returns a uniform grid + noise, reshaped to shape argument."""
  rng = np.random.RandomState(0)
  num_points = np.prod(grid_spec.shape)
-  grid = np.linspace(
-      grid_spec.min, grid_spec.max, num=num_points).astype(dtype)
+  grid = np.linspace(grid_spec.min, grid_spec.max, num=num_points).astype(dtype)
  grid_spacing = (grid_spec.max - grid_spec.min) / num_points
  grid += 0.1 * grid_spacing * rng.randn(*grid.shape)
  # More useful if it's sorted (e.g. for testing monotonicity, or debugging).
@@ -48,11 +53,10 @@ def _make_grid(dtype, grid_spec):

 GridSpec = collections.namedtuple("GridSpec", ["min", "max", "shape"])

-
 ErrorSpec = collections.namedtuple("ErrorSpec", ["rtol", "atol"])


-class NdtrTest(tf.test.TestCase):
+class NdtrTest(test.TestCase):
  _use_log = False
  # Grid min/max chosen to ensure 0 < cdf(x) < 1.
  _grid32 = GridSpec(min=-12.9, max=5., shape=[100])
@@ -83,9 +87,11 @@ class NdtrTest(tf.test.TestCase):
      expected = special.log_ndtr(grid)
      # Scipy prematurely goes to zero at some places that we don't.  So don't
      # include these in the comparison.
-      self.assertAllClose(expected.astype(np.float64)[expected < 0],
-                          actual.astype(np.float64)[expected < 0],
-                          rtol=error_spec.rtol, atol=error_spec.atol)
+      self.assertAllClose(
+          expected.astype(np.float64)[expected < 0],
+          actual.astype(np.float64)[expected < 0],
+          rtol=error_spec.rtol,
+          atol=error_spec.atol)

  def _test_grid_no_log(self, dtype, grid_spec, error_spec):
    with self.test_session():
@@ -104,9 +110,11 @@ class NdtrTest(tf.test.TestCase):
      expected = special.ndtr(grid)
      # Scipy prematurely goes to zero at some places that we don't.  So don't
      # include these in the comparison.
-      self.assertAllClose(expected.astype(np.float64)[expected < 0],
-                          actual.astype(np.float64)[expected < 0],
-                          rtol=error_spec.rtol, atol=error_spec.atol)
+      self.assertAllClose(
+          expected.astype(np.float64)[expected < 0],
+          actual.astype(np.float64)[expected < 0],
+          rtol=error_spec.rtol,
+          atol=error_spec.atol)

  def test_float32(self):
    self._test_grid(np.float32, self._grid32, self._error32)
@@ -130,13 +138,9 @@ class LogNdtrTestLower(NdtrTest):
 class LogNdtrTestMid(NdtrTest):
  _use_log = True
  _grid32 = GridSpec(
-      min=sm.LOGNDTR_FLOAT32_LOWER,
-      max=sm.LOGNDTR_FLOAT32_UPPER,
-      shape=[100])
+      min=sm.LOGNDTR_FLOAT32_LOWER, max=sm.LOGNDTR_FLOAT32_UPPER, shape=[100])
  _grid64 = GridSpec(
-      min=sm.LOGNDTR_FLOAT64_LOWER,
-      max=sm.LOGNDTR_FLOAT64_UPPER,
-      shape=[100])
+      min=sm.LOGNDTR_FLOAT64_LOWER, max=sm.LOGNDTR_FLOAT64_UPPER, shape=[100])
  # Differences show up as soon as we're in the tail, so add some atol.
  _error32 = ErrorSpec(rtol=0.1, atol=1e-7)
  _error64 = ErrorSpec(rtol=0.1, atol=1e-7)
@@ -156,7 +160,7 @@ class LogNdtrTestUpper(NdtrTest):
  _error64 = ErrorSpec(rtol=1e-6, atol=1e-14)


-class NdtrGradientTest(tf.test.TestCase):
+class NdtrGradientTest(test.TestCase):
  _use_log = False
  _grid = GridSpec(min=-100., max=100., shape=[1, 2, 3, 8])
  _error32 = ErrorSpec(rtol=1e-4, atol=0)
@@ -170,16 +174,16 @@ class NdtrGradientTest(tf.test.TestCase):

  def _test_grad_finite(self, dtype):
    with self.test_session():
-      x = tf.Variable([-100., 0., 100.], dtype=dtype)
+      x = variables.Variable([-100., 0., 100.], dtype=dtype)
      output = (sm.log_ndtr(x) if self._use_log else sm.ndtr(x))
-      grad_output = tf.gradients(output, x)
-      tf.global_variables_initializer().run()
+      grad_output = gradients_impl.gradients(output, x)
+      variables.global_variables_initializer().run()
      self.assert_all_true(np.isfinite(output.eval()))
      self.assert_all_true(np.isfinite(grad_output[0].eval()))

  def _test_grad_accuracy(self, dtype, grid_spec, error_spec):
    raw_grid = _make_grid(dtype, grid_spec)
-    grid = tf.convert_to_tensor(raw_grid)
+    grid = ops.convert_to_tensor(raw_grid)
    with self.test_session():
      fn = sm.log_ndtr if self._use_log else sm.ndtr

@@ -189,8 +193,9 @@ class NdtrGradientTest(tf.test.TestCase):
      # diagonal to be nonzero.
      # TODO(b/31131137): Replace tf.test.compute_gradient with our own custom
      # gradient evaluation to ensure we correctly handle small function delta.
-      grad_eval, _ = tf.test.compute_gradient(
-          grid, grid_spec.shape, fn(grid), grid_spec.shape)
+      grad_eval, _ = gradient_checker.compute_gradient(grid, grid_spec.shape,
+                                                       fn(grid),
+                                                       grid_spec.shape)
      grad_eval = np.diag(grad_eval)

      # Check for NaN separately in order to get informative failures.
@@ -201,11 +206,11 @@ class NdtrGradientTest(tf.test.TestCase):
      # Do the same checks but explicitly compute the gradient.
      # (We did this because we're not sure if we trust
      # tf.test.compute_gradient.)
-      grad_eval = tf.gradients(fn(grid), grid)[0].eval()
+      grad_eval = gradients_impl.gradients(fn(grid), grid)[0].eval()
      self.assert_all_false(np.isnan(grad_eval))
      if self._use_log:
        g = np.reshape(grad_eval, [-1])
-        half = np.ceil(len(g)/2)
+        half = np.ceil(len(g) / 2)
        self.assert_all_true(g[:half] > 0.)
        self.assert_all_true(g[half:] >= 0.)
      else:
@@ -221,9 +226,11 @@ class NdtrGradientTest(tf.test.TestCase):
        expected[np.isnan(expected)] = 0.
      # Scipy prematurely goes to zero at some places that we don't.  So don't
      # include these in the comparison.
-      self.assertAllClose(expected.astype(np.float64)[expected < 0],
-                          grad_eval.astype(np.float64)[expected < 0],
-                          rtol=error_spec.rtol, atol=error_spec.atol)
+      self.assertAllClose(
+          expected.astype(np.float64)[expected < 0],
+          grad_eval.astype(np.float64)[expected < 0],
+          rtol=error_spec.rtol,
+          atol=error_spec.atol)

  def test_float32(self):
    self._test_grad_accuracy(np.float32, self._grid, self._error32)
@@ -239,4 +246,4 @@ class LogNdtrGradientTest(NdtrGradientTest):


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_gradient_estimators_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_gradient_estimators_test.py
@@ -19,11 +19,21 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf
-
-st = tf.contrib.bayesflow.stochastic_tensor
-sge = tf.contrib.bayesflow.stochastic_gradient_estimators
-dists = tf.contrib.distributions
+from tensorflow.contrib import distributions
+from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators
+from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+st = stochastic_tensor
+sge = stochastic_gradient_estimators
+dists = distributions


 def _vimco(loss):
@@ -38,11 +48,10 @@ def _vimco(loss):

  learning_signal = []
  for j in range(n):
-    learning_signal.append(
-        np.sum([loss[i, :] for i in range(n) if i != j], 0))
+    learning_signal.append(np.sum([loss[i, :] for i in range(n) if i != j], 0))
  learning_signal = np.array(learning_signal)

-  local_learning_signal = np.log(1/n * (learning_signal + geometric_mean))
+  local_learning_signal = np.log(1 / n * (learning_signal + geometric_mean))

  # log_mean - local_learning_signal
  log_mean = np.log(np.mean(loss, 0))
@@ -51,38 +60,38 @@ def _vimco(loss):
  return advantage


-class StochasticGradientEstimatorsTest(tf.test.TestCase):
+class StochasticGradientEstimatorsTest(test.TestCase):

  def setUp(self):
-    self._p = tf.constant(0.999999)
-    self._final_loss = tf.constant(3.2)
+    self._p = constant_op.constant(0.999999)
+    self._final_loss = constant_op.constant(3.2)

  def _testScoreFunction(self, loss_fn, expected):
    x = st.StochasticTensor(dists.Bernoulli(p=self._p), loss_fn=loss_fn)
    sf = x.loss(self._final_loss)
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllClose(*sess.run([expected, sf]))

  def testScoreFunction(self):
-    expected = tf.log(self._p) * self._final_loss
+    expected = math_ops.log(self._p) * self._final_loss
    self._testScoreFunction(sge.score_function, expected)

  def testScoreFunctionWithConstantBaseline(self):
-    b = tf.constant(9.8)
-    expected = tf.log(self._p) * (self._final_loss - b)
+    b = constant_op.constant(9.8)
+    expected = math_ops.log(self._p) * (self._final_loss - b)
    self._testScoreFunction(
        sge.get_score_function_with_constant_baseline(b), expected)

  def testScoreFunctionWithBaselineFn(self):
-    b = tf.constant(9.8)
+    b = constant_op.constant(9.8)

    def baseline_fn(stoch_tensor, loss):
      self.assertTrue(isinstance(stoch_tensor, st.StochasticTensor))
-      self.assertTrue(isinstance(loss, tf.Tensor))
+      self.assertTrue(isinstance(loss, ops.Tensor))
      return b

-    expected = tf.log(self._p) * (self._final_loss - b)
+    expected = math_ops.log(self._p) * (self._final_loss - b)
    self._testScoreFunction(
        sge.get_score_function_with_baseline(baseline_fn), expected)

@@ -103,23 +112,23 @@ class StochasticGradientEstimatorsTest(tf.test.TestCase):
    # Baseline is EMA with bias correction
    bias_correction = 1. - ema_decay**num_steps
    baseline = ema / bias_correction
-    expected = tf.log(self._p) * (self._final_loss - baseline)
+    expected = math_ops.log(self._p) * (self._final_loss - baseline)

    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      for _ in range(num_steps - 1):
        sess.run(sf)  # run to update EMA
      self.assertAllClose(*sess.run([expected, sf]))

  def testScoreFunctionWithAdvantageFn(self):
-    b = tf.constant(9.8)
+    b = constant_op.constant(9.8)

    def advantage_fn(stoch_tensor, loss):
      self.assertTrue(isinstance(stoch_tensor, st.StochasticTensor))
-      self.assertTrue(isinstance(loss, tf.Tensor))
+      self.assertTrue(isinstance(loss, ops.Tensor))
      return loss - b

-    expected = tf.log(self._p) * (self._final_loss - b)
+    expected = math_ops.log(self._p) * (self._final_loss - b)
    self._testScoreFunction(
        sge.get_score_function_with_advantage(advantage_fn), expected)

@@ -130,14 +139,14 @@ class StochasticGradientEstimatorsTest(tf.test.TestCase):
         [1e-6, 1e4],
         [2.0, 3.0]])
    # random_loss: (100, 50, 64) with 100 samples, batch shape (50, 64)
-    random_loss = 100*np.random.rand(100, 50, 64)
+    random_loss = 100 * np.random.rand(100, 50, 64)

    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=False)

    with self.test_session() as sess:
      for loss in [simple_loss, random_loss]:
        expected = _vimco(loss)
-        loss_t = tf.constant(loss, dtype=tf.float32)
+        loss_t = constant_op.constant(loss, dtype=dtypes.float32)
        advantage_t = advantage_fn(None, loss_t)  # ST is not used
        advantage = sess.run(advantage_t)
        self.assertEqual(expected.shape, advantage_t.get_shape())
@@ -151,24 +160,26 @@ class StochasticGradientEstimatorsTest(tf.test.TestCase):
    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)

    with self.test_session():
-      loss_t = tf.constant(loss, dtype=tf.float64)
+      loss_t = constant_op.constant(loss, dtype=dtypes.float64)
      advantage_t = advantage_fn(None, loss_t)  # ST is not used
-      gradient_error = tf.test.compute_gradient_error(
-          loss_t, loss_t.get_shape().as_list(),
-          advantage_t, advantage_t.get_shape().as_list(),
+      gradient_error = gradient_checker.compute_gradient_error(
+          loss_t,
+          loss_t.get_shape().as_list(),
+          advantage_t,
+          advantage_t.get_shape().as_list(),
          x_init_value=loss)
      self.assertLess(gradient_error, 1e-3)

  def testVIMCOAdvantageWithSmallProbabilities(self):
    theta_value = np.random.rand(10, 100000)
    # Test with float16 dtype to ensure stability even in this extreme case.
-    theta = tf.constant(theta_value, dtype=tf.float16)
+    theta = constant_op.constant(theta_value, dtype=dtypes.float16)
    advantage_fn = sge.get_vimco_advantage_fn(have_log_loss=True)

    with self.test_session() as sess:
-      log_loss = -tf.reduce_sum(theta, [1])
+      log_loss = -math_ops.reduce_sum(theta, [1])
      advantage_t = advantage_fn(None, log_loss)
-      grad_t = tf.gradients(advantage_t, theta)[0]
+      grad_t = gradients_impl.gradients(advantage_t, theta)[0]
      advantage, grad = sess.run((advantage_t, grad_t))
      self.assertTrue(np.all(np.isfinite(advantage)))
      self.assertTrue(np.all(np.isfinite(grad)))
@@ -187,9 +198,9 @@ class StochasticGradientEstimatorsTest(tf.test.TestCase):
    sf_y = y.loss(self._final_loss)
    with self.test_session() as sess:
      # Smoke test
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      sess.run([sf_x, sf_y])


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_graph_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_graph_test.py
@@ -18,11 +18,20 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
-
-st = tf.contrib.bayesflow.stochastic_tensor
-sg = tf.contrib.bayesflow.stochastic_graph
-distributions = tf.contrib.distributions
+from tensorflow.contrib import distributions as distributions_lib
+from tensorflow.contrib.bayesflow.python.ops import stochastic_graph
+from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+st = stochastic_tensor
+sg = stochastic_graph
+distributions = distributions_lib


 class NormalNotParam(distributions.Normal):
@@ -32,27 +41,28 @@ class NormalNotParam(distributions.Normal):
    return False


-class TestSurrogateLosses(tf.test.TestCase):
+class TestSurrogateLosses(test.TestCase):

  def testPathwiseDerivativeDoesNotAddSurrogateLosses(self):
    with self.test_session():
      mu = [0.0, 0.1, 0.2]
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
      with st.value_type(st.SampleValue()):
        prior = st.StochasticTensor(distributions.Normal(mu=mu, sigma=sigma))
        likelihood = st.StochasticTensor(
-            distributions.Normal(mu=prior, sigma=sigma))
+            distributions.Normal(
+                mu=prior, sigma=sigma))
        self.assertTrue(prior.distribution.is_reparameterized)
        self.assertTrue(likelihood.distribution.is_reparameterized)

-      loss = tf.square(tf.identity(likelihood) - [0.0, 0.1, 0.2])
-      sum_loss = tf.reduce_sum(loss)
+      loss = math_ops.square(array_ops.identity(likelihood) - [0.0, 0.1, 0.2])
+      sum_loss = math_ops.reduce_sum(loss)

      surrogate_loss = sg.surrogate_loss([loss])
      with self.assertRaisesRegexp(ValueError, "dimensionality 1 or greater"):
        _ = sg.surrogate_loss([sum_loss])
      surrogate_from_both = sg.surrogate_loss(
-          [loss, sum_loss * tf.ones_like(loss)])
+          [loss, sum_loss * array_ops.ones_like(loss)])

      # Pathwise derivative terms do not require add'l surrogate loss terms.
      with self.test_session() as sess:
@@ -61,12 +71,12 @@ class TestSurrogateLosses(tf.test.TestCase):

  def _testSurrogateLoss(self, session, losses, expected_addl_terms, xs):
    surrogate_loss = sg.surrogate_loss(losses)
-    expected_surrogate_loss = tf.add_n(losses + expected_addl_terms)
+    expected_surrogate_loss = math_ops.add_n(losses + expected_addl_terms)
    self.assertAllClose(*session.run([surrogate_loss, expected_surrogate_loss]))

    # Test backprop
-    expected_grads = tf.gradients(ys=expected_surrogate_loss, xs=xs)
-    surrogate_grads = tf.gradients(ys=surrogate_loss, xs=xs)
+    expected_grads = gradients_impl.gradients(ys=expected_surrogate_loss, xs=xs)
+    surrogate_grads = gradients_impl.gradients(ys=surrogate_loss, xs=xs)
    self.assertEqual(len(expected_grads), len(surrogate_grads))
    grad_values = session.run(expected_grads + surrogate_grads)
    n_grad = len(expected_grads)
@@ -74,22 +84,22 @@ class TestSurrogateLosses(tf.test.TestCase):

  def testSurrogateLoss(self):
    with self.test_session() as sess:
-      mu = tf.constant([0.0, 0.1, 0.2])
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      mu = constant_op.constant([0.0, 0.1, 0.2])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
      with st.value_type(st.SampleValue()):
        prior = st.StochasticTensor(NormalNotParam(mu=mu, sigma=sigma))
        likelihood = st.StochasticTensor(NormalNotParam(mu=prior, sigma=sigma))
        prior_2 = st.StochasticTensor(NormalNotParam(mu=mu, sigma=sigma))

-      loss = tf.square(tf.identity(likelihood) - mu)
-      part_loss = tf.square(tf.identity(prior) - mu)
-      sum_loss = tf.reduce_sum(loss)
-      loss_nodeps = tf.square(tf.identity(prior_2) - mu)
+      loss = math_ops.square(array_ops.identity(likelihood) - mu)
+      part_loss = math_ops.square(array_ops.identity(prior) - mu)
+      sum_loss = math_ops.reduce_sum(loss)
+      loss_nodeps = math_ops.square(array_ops.identity(prior_2) - mu)

      # For ground truth, use the stop-gradient versions of the losses
-      loss_nograd = tf.stop_gradient(loss)
-      loss_nodeps_nograd = tf.stop_gradient(loss_nodeps)
-      sum_loss_nograd = tf.stop_gradient(sum_loss)
+      loss_nograd = array_ops.stop_gradient(loss)
+      loss_nodeps_nograd = array_ops.stop_gradient(loss_nodeps)
+      sum_loss_nograd = array_ops.stop_gradient(sum_loss)

      # These score functions should ignore prior_2
      self._testSurrogateLoss(
@@ -97,7 +107,8 @@ class TestSurrogateLosses(tf.test.TestCase):
          losses=[loss],
          expected_addl_terms=[
              likelihood.distribution.log_pdf(likelihood.value()) * loss_nograd,
-              prior.distribution.log_pdf(prior.value()) * loss_nograd],
+              prior.distribution.log_pdf(prior.value()) * loss_nograd
+          ],
          xs=[mu, sigma])

      self._testSurrogateLoss(
@@ -105,35 +116,36 @@ class TestSurrogateLosses(tf.test.TestCase):
          losses=[loss, part_loss],
          expected_addl_terms=[
              likelihood.distribution.log_pdf(likelihood.value()) * loss_nograd,
-              (prior.distribution.log_pdf(prior.value())
-               * tf.stop_gradient(part_loss + loss))],
+              (prior.distribution.log_pdf(prior.value()) *
+               array_ops.stop_gradient(part_loss + loss))
+          ],
          xs=[mu, sigma])

      self._testSurrogateLoss(
          session=sess,
-          losses=[sum_loss * tf.ones_like(loss)],
-          expected_addl_terms=[
-              (likelihood.distribution.log_pdf(likelihood.value())
-               * sum_loss_nograd),
-              prior.distribution.log_pdf(prior.value()) * sum_loss_nograd],
+          losses=[sum_loss * array_ops.ones_like(loss)],
+          expected_addl_terms=[(
+              likelihood.distribution.log_pdf(likelihood.value()) *
+              sum_loss_nograd), prior.distribution.log_pdf(prior.value()) *
+                               sum_loss_nograd],
          xs=[mu, sigma])

      self._testSurrogateLoss(
          session=sess,
-          losses=[loss, sum_loss * tf.ones_like(loss)],
-          expected_addl_terms=[
-              (likelihood.distribution.log_pdf(likelihood.value())
-               * tf.stop_gradient(loss + sum_loss)),
-              (prior.distribution.log_pdf(prior.value())
-               * tf.stop_gradient(loss + sum_loss))],
+          losses=[loss, sum_loss * array_ops.ones_like(loss)],
+          expected_addl_terms=[(
+              likelihood.distribution.log_pdf(likelihood.value()) *
+              array_ops.stop_gradient(loss + sum_loss)),
+                               (prior.distribution.log_pdf(prior.value()) *
+                                array_ops.stop_gradient(loss + sum_loss))],
          xs=[mu, sigma])

      # These score functions should ignore prior and likelihood
      self._testSurrogateLoss(
          session=sess,
          losses=[loss_nodeps],
-          expected_addl_terms=[(prior_2.distribution.log_pdf(prior_2.value())
-                                * loss_nodeps_nograd)],
+          expected_addl_terms=[(prior_2.distribution.log_pdf(prior_2.value()) *
+                                loss_nodeps_nograd)],
          xs=[mu, sigma])

      # These score functions should include all terms selectively
@@ -141,31 +153,32 @@ class TestSurrogateLosses(tf.test.TestCase):
          session=sess,
          losses=[loss, loss_nodeps],
          # We can't guarantee ordering of output losses in this case.
-          expected_addl_terms=[
-              (likelihood.distribution.log_pdf(likelihood.value())
-               * loss_nograd),
-              prior.distribution.log_pdf(prior.value()) * loss_nograd,
-              (prior_2.distribution.log_pdf(prior_2.value())
-               * loss_nodeps_nograd)],
+          expected_addl_terms=[(
+              likelihood.distribution.log_pdf(likelihood.value()) *
+              loss_nograd), prior.distribution.log_pdf(prior.value()) *
+                               loss_nograd,
+                               (prior_2.distribution.log_pdf(prior_2.value()) *
+                                loss_nodeps_nograd)],
          xs=[mu, sigma])

  def testNoSurrogateLoss(self):
    with self.test_session():
-      mu = tf.constant([0.0, 0.1, 0.2])
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      mu = constant_op.constant([0.0, 0.1, 0.2])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
      with st.value_type(st.SampleValue()):
-        dt = st.StochasticTensor(NormalNotParam(mu=mu, sigma=sigma),
-                                 loss_fn=None)
-        self.assertEqual(None, dt.loss(tf.constant([2.0])))
+        dt = st.StochasticTensor(
+            NormalNotParam(
+                mu=mu, sigma=sigma), loss_fn=None)
+        self.assertEqual(None, dt.loss(constant_op.constant([2.0])))

  def testExplicitStochasticTensors(self):
    with self.test_session() as sess:
-      mu = tf.constant([0.0, 0.1, 0.2])
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      mu = constant_op.constant([0.0, 0.1, 0.2])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
      with st.value_type(st.SampleValue()):
        dt1 = st.StochasticTensor(NormalNotParam(mu=mu, sigma=sigma))
        dt2 = st.StochasticTensor(NormalNotParam(mu=mu, sigma=sigma))
-        loss = tf.square(tf.identity(dt1)) + 10. + dt2
+        loss = math_ops.square(array_ops.identity(dt1)) + 10. + dt2

        sl_all = sg.surrogate_loss([loss])
        sl_dt1 = sg.surrogate_loss([loss], stochastic_tensors=[dt1])
@@ -180,7 +193,7 @@ class TestSurrogateLosses(tf.test.TestCase):
        self.assertAllClose(*sess.run([sl_dt2, sum([loss, dt2_term])]))


-class StochasticDependenciesMapTest(tf.test.TestCase):
+class StochasticDependenciesMapTest(test.TestCase):

  def testBuildsMapOfUpstreamNodes(self):
    dt1 = st.StochasticTensor(distributions.Normal(mu=0., sigma=1.))
@@ -211,10 +224,11 @@ class StochasticDependenciesMapTest(tf.test.TestCase):
    dt2 = st.StochasticTensor(distributions.Bernoulli(logits=logits))
    dt3 = st.StochasticTensor(distributions.Normal(mu=0., sigma=1.))
    x = dt3.value()
-    y = tf.ones((2, 2)) * 4.
-    z = tf.ones((2, 2)) * 3.
-    out = tf.cond(
-        tf.cast(dt2, tf.bool), lambda: tf.add(x, y), lambda: tf.square(z))
+    y = array_ops.ones((2, 2)) * 4.
+    z = array_ops.ones((2, 2)) * 3.
+    out = control_flow_ops.cond(
+        math_ops.cast(dt2, dtypes.bool), lambda: math_ops.add(x, y),
+        lambda: math_ops.square(z))
    out += 5.
    dep_map = sg._stochastic_dependencies_map([out])
    self.assertEqual(dep_map[dt1], set([out]))
@@ -223,4 +237,4 @@ class StochasticDependenciesMapTest(tf.test.TestCase):


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_tensor_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_tensor_test.py
@@ -19,27 +19,35 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf
+from tensorflow.contrib import distributions as distributions_lib
+from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators
+from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test

-distributions = tf.contrib.distributions
-sge = tf.contrib.bayesflow.stochastic_gradient_estimators
-st = tf.contrib.bayesflow.stochastic_tensor
+distributions = distributions_lib
+sge = stochastic_gradient_estimators
+st = stochastic_tensor


-class StochasticTensorTest(tf.test.TestCase):
+class StochasticTensorTest(test.TestCase):

  def testConstructionAndValue(self):
    with self.test_session() as sess:
      mu = [0.0, 0.1, 0.2]
-      sigma = tf.constant([1.1, 1.2, 1.3])
-      sigma2 = tf.constant([0.1, 0.2, 0.3])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
+      sigma2 = constant_op.constant([0.1, 0.2, 0.3])

      prior_default = st.StochasticTensor(
-          distributions.Normal(mu=mu, sigma=sigma))
-      self.assertTrue(
-          isinstance(prior_default.value_type, st.SampleValue))
+          distributions.Normal(
+              mu=mu, sigma=sigma))
+      self.assertTrue(isinstance(prior_default.value_type, st.SampleValue))
      prior_0 = st.StochasticTensor(
-          distributions.Normal(mu=mu, sigma=sigma),
+          distributions.Normal(
+              mu=mu, sigma=sigma),
          dist_value_type=st.SampleValue())
      self.assertTrue(isinstance(prior_0.value_type, st.SampleValue))

@@ -47,18 +55,18 @@ class StochasticTensorTest(tf.test.TestCase):
        prior = st.StochasticTensor(distributions.Normal(mu=mu, sigma=sigma))
        self.assertTrue(isinstance(prior.value_type, st.SampleValue))
        likelihood = st.StochasticTensor(
-            distributions.Normal(mu=prior, sigma=sigma2))
-        self.assertTrue(
-            isinstance(likelihood.value_type, st.SampleValue))
+            distributions.Normal(
+                mu=prior, sigma=sigma2))
+        self.assertTrue(isinstance(likelihood.value_type, st.SampleValue))

-      coll = tf.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
+      coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
      self.assertEqual(coll, [prior_default, prior_0, prior, likelihood])

      # Also works: tf.convert_to_tensor(prior)
-      prior_default = tf.identity(prior_default)
-      prior_0 = tf.identity(prior_0)
-      prior = tf.identity(prior)
-      likelihood = tf.identity(likelihood)
+      prior_default = array_ops.identity(prior_default)
+      prior_0 = array_ops.identity(prior_0)
+      prior = array_ops.identity(prior)
+      likelihood = array_ops.identity(likelihood)

      # Mostly a smoke test for now...
      prior_0_val, prior_val, prior_default_val, _ = sess.run(
@@ -74,7 +82,7 @@ class StochasticTensorTest(tf.test.TestCase):
  def testMeanValue(self):
    with self.test_session() as sess:
      mu = [0.0, -1.0, 1.0]
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])

      with st.value_type(st.MeanValue()):
        prior = st.StochasticTensor(distributions.Normal(mu=mu, sigma=sigma))
@@ -90,11 +98,12 @@ class StochasticTensorTest(tf.test.TestCase):
  def testSampleValueScalar(self):
    with self.test_session() as sess:
      mu = [[0.0, -1.0, 1.0], [0.0, -1.0, 1.0]]
-      sigma = tf.constant([[1.1, 1.2, 1.3], [1.1, 1.2, 1.3]])
+      sigma = constant_op.constant([[1.1, 1.2, 1.3], [1.1, 1.2, 1.3]])

      with st.value_type(st.SampleValue()):
        prior_single = st.StochasticTensor(
-            distributions.Normal(mu=mu, sigma=sigma))
+            distributions.Normal(
+                mu=mu, sigma=sigma))

      prior_single_value = prior_single.value()
      self.assertEqual(prior_single_value.get_shape(), (2, 3))
@@ -104,7 +113,8 @@ class StochasticTensorTest(tf.test.TestCase):

      with st.value_type(st.SampleValue(1)):
        prior_single = st.StochasticTensor(
-            distributions.Normal(mu=mu, sigma=sigma))
+            distributions.Normal(
+                mu=mu, sigma=sigma))
        self.assertTrue(isinstance(prior_single.value_type, st.SampleValue))

      prior_single_value = prior_single.value()
@@ -115,7 +125,8 @@ class StochasticTensorTest(tf.test.TestCase):

      with st.value_type(st.SampleValue(2)):
        prior_double = st.StochasticTensor(
-            distributions.Normal(mu=mu, sigma=sigma))
+            distributions.Normal(
+                mu=mu, sigma=sigma))

      prior_double_value = prior_double.value()
      self.assertEqual(prior_double_value.get_shape(), (2, 2, 3))
@@ -126,7 +137,7 @@ class StochasticTensorTest(tf.test.TestCase):
  def testDistributionEntropy(self):
    with self.test_session() as sess:
      mu = [0.0, -1.0, 1.0]
-      sigma = tf.constant([1.1, 1.2, 1.3])
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
      with st.value_type(st.MeanValue()):
        prior = st.StochasticTensor(distributions.Normal(mu=mu, sigma=sigma))
        entropy = prior.entropy()
@@ -140,29 +151,30 @@ class StochasticTensorTest(tf.test.TestCase):
  def testSurrogateLoss(self):
    with self.test_session():
      mu = [[3.0, -4.0, 5.0], [6.0, -7.0, 8.0]]
-      sigma = tf.constant(1.0)
+      sigma = constant_op.constant(1.0)

      # With default
      with st.value_type(st.MeanValue(stop_gradient=True)):
        dt = st.StochasticTensor(distributions.Normal(mu=mu, sigma=sigma))
-      loss = dt.loss([tf.constant(2.0)])
+      loss = dt.loss([constant_op.constant(2.0)])
      self.assertTrue(loss is not None)
      self.assertAllClose(
          dt.distribution.log_prob(mu).eval() * 2.0, loss.eval())

      # With passed-in loss_fn.
      dt = st.StochasticTensor(
-          distributions.Normal(mu=mu, sigma=sigma),
+          distributions.Normal(
+              mu=mu, sigma=sigma),
          dist_value_type=st.MeanValue(stop_gradient=True),
          loss_fn=sge.get_score_function_with_constant_baseline(
-              baseline=tf.constant(8.0)))
-      loss = dt.loss([tf.constant(2.0)])
+              baseline=constant_op.constant(8.0)))
+      loss = dt.loss([constant_op.constant(2.0)])
      self.assertTrue(loss is not None)
      self.assertAllClose((dt.distribution.log_prob(mu) * (2.0 - 8.0)).eval(),
                          loss.eval())


-class ValueTypeTest(tf.test.TestCase):
+class ValueTypeTest(test.TestCase):

  def testValueType(self):
    type_mean = st.MeanValue()
@@ -179,35 +191,38 @@ class ValueTypeTest(tf.test.TestCase):
      st.get_current_value_type()


-class ObservedStochasticTensorTest(tf.test.TestCase):
+class ObservedStochasticTensorTest(test.TestCase):

  def testConstructionAndValue(self):
    with self.test_session() as sess:
      mu = [0.0, 0.1, 0.2]
-      sigma = tf.constant([1.1, 1.2, 1.3])
-      obs = tf.zeros((2, 3))
+      sigma = constant_op.constant([1.1, 1.2, 1.3])
+      obs = array_ops.zeros((2, 3))
      z = st.ObservedStochasticTensor(
-          distributions.Normal(mu=mu, sigma=sigma), value=obs)
+          distributions.Normal(
+              mu=mu, sigma=sigma), value=obs)
      [obs_val, z_val] = sess.run([obs, z.value()])
      self.assertAllEqual(obs_val, z_val)

-      coll = tf.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
+      coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
      self.assertEqual(coll, [z])

  def testConstructionWithUnknownShapes(self):
-    mu = tf.placeholder(tf.float32)
-    sigma = tf.placeholder(tf.float32)
-    obs = tf.placeholder(tf.float32)
+    mu = array_ops.placeholder(dtypes.float32)
+    sigma = array_ops.placeholder(dtypes.float32)
+    obs = array_ops.placeholder(dtypes.float32)
    z = st.ObservedStochasticTensor(
-        distributions.Normal(mu=mu, sigma=sigma), value=obs)
+        distributions.Normal(
+            mu=mu, sigma=sigma), value=obs)

-    mu2 = tf.placeholder(tf.float32, shape=[None])
-    sigma2 = tf.placeholder(tf.float32, shape=[None])
-    obs2 = tf.placeholder(tf.float32, shape=[None, None])
+    mu2 = array_ops.placeholder(dtypes.float32, shape=[None])
+    sigma2 = array_ops.placeholder(dtypes.float32, shape=[None])
+    obs2 = array_ops.placeholder(dtypes.float32, shape=[None, None])
    z2 = st.ObservedStochasticTensor(
-        distributions.Normal(mu=mu2, sigma=sigma2), value=obs2)
+        distributions.Normal(
+            mu=mu2, sigma=sigma2), value=obs2)

-    coll = tf.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
+    coll = ops.get_collection(st.STOCHASTIC_TENSOR_COLLECTION)
    self.assertEqual(coll, [z, z2])

  def testConstructionErrors(self):
@@ -216,21 +231,23 @@ class ObservedStochasticTensorTest(tf.test.TestCase):
    self.assertRaises(
        ValueError,
        st.ObservedStochasticTensor,
-        distributions.Normal(mu=mu, sigma=sigma),
-        value=tf.zeros((3,)))
+        distributions.Normal(
+            mu=mu, sigma=sigma),
+        value=array_ops.zeros((3,)))
    self.assertRaises(
        ValueError,
        st.ObservedStochasticTensor,
-        distributions.Normal(mu=mu, sigma=sigma),
-        value=tf.zeros((3, 1)))
+        distributions.Normal(
+            mu=mu, sigma=sigma),
+        value=array_ops.zeros((3, 1)))
    self.assertRaises(
        ValueError,
        st.ObservedStochasticTensor,
-        distributions.Normal(mu=mu, sigma=sigma),
-        value=tf.zeros(
-            (1, 2), dtype=tf.int32))
+        distributions.Normal(
+            mu=mu, sigma=sigma),
+        value=array_ops.zeros(
+            (1, 2), dtype=dtypes.int32))


 if __name__ == "__main__":
-  tf.test.main()
-
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_variables_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/stochastic_variables_test.py
@@ -19,41 +19,52 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf
-
-sv = tf.contrib.bayesflow.stochastic_variables
-st = tf.contrib.bayesflow.stochastic_tensor
-vi = tf.contrib.bayesflow.variational_inference
-dist = tf.contrib.distributions
-
-
-class StochasticVariablesTest(tf.test.TestCase):
+from tensorflow.contrib import distributions
+from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
+from tensorflow.contrib.bayesflow.python.ops import stochastic_variables
+from tensorflow.contrib.bayesflow.python.ops import variational_inference
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+sv = stochastic_variables
+st = stochastic_tensor
+vi = variational_inference
+dist = distributions
+
+
+class StochasticVariablesTest(test.TestCase):

  def testStochasticVariables(self):
    shape = (10, 20)
-    with tf.variable_scope(
+    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma)):
-      v = tf.get_variable("sv", shape)
+      v = variable_scope.get_variable("sv", shape)

    self.assertTrue(isinstance(v, st.StochasticTensor))
    self.assertTrue(isinstance(v.distribution, dist.NormalWithSoftplusSigma))

    self.assertEqual(
        {"stochastic_variables/sv_mu", "stochastic_variables/sv_sigma"},
-        set([v.op.name for v in tf.global_variables()]))
-    self.assertEqual(set(tf.trainable_variables()), set(tf.global_variables()))
+        set([v.op.name for v in variables.global_variables()]))
+    self.assertEqual(
+        set(variables.trainable_variables()), set(variables.global_variables()))

-    v = tf.convert_to_tensor(v)
+    v = ops.convert_to_tensor(v)
    self.assertEqual(list(shape), v.get_shape().as_list())
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertEqual(shape, sess.run(v).shape)

  def testStochasticVariablesWithConstantInitializer(self):
    shape = (10, 20)
-    with tf.variable_scope(
+    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma,
@@ -62,17 +73,17 @@ class StochasticVariablesTest(tf.test.TestCase):
                "mu": np.ones(shape) * 4.,
                "sigma": np.ones(shape) * 2.
            })):
-      v = tf.get_variable("sv")
+      v = variable_scope.get_variable("sv")

-    for var in tf.global_variables():
+    for var in variables.global_variables():
      if "mu" in var.name:
        mu_var = var
      if "sigma" in var.name:
        sigma_var = var

-    v = tf.convert_to_tensor(v)
+    v = ops.convert_to_tensor(v)
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
      self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
      self.assertEqual(shape, sess.run(v).shape)
@@ -82,9 +93,9 @@ class StochasticVariablesTest(tf.test.TestCase):

    def sigma_init(shape, dtype, partition_info):
      _ = partition_info
-      return tf.ones(shape, dtype=dtype) * 2.
+      return array_ops.ones(shape, dtype=dtype) * 2.

-    with tf.variable_scope(
+    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma,
@@ -94,17 +105,17 @@ class StochasticVariablesTest(tf.test.TestCase):
                    shape, dtype=np.float32) * 4.,
                "sigma": sigma_init
            })):
-      v = tf.get_variable("sv", shape)
+      v = variable_scope.get_variable("sv", shape)

-    for var in tf.global_variables():
+    for var in variables.global_variables():
      if "mu" in var.name:
        mu_var = var
      if "sigma" in var.name:
        sigma_var = var

-    v = tf.convert_to_tensor(v)
+    v = ops.convert_to_tensor(v)
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(np.ones(shape) * 4., sess.run(mu_var))
      self.assertAllEqual(np.ones(shape) * 2., sess.run(sigma_var))
      self.assertEqual(shape, sess.run(v).shape)
@@ -112,45 +123,46 @@ class StochasticVariablesTest(tf.test.TestCase):
  def testStochasticVariablesWithPrior(self):
    shape = (10, 20)
    prior = dist.Normal(0., 1.)
-    with tf.variable_scope(
+    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma, prior=prior)):
-      w = tf.get_variable("weights", shape)
+      w = variable_scope.get_variable("weights", shape)

-    x = tf.random_uniform((8, 10))
-    y = tf.matmul(x, w)
+    x = random_ops.random_uniform((8, 10))
+    y = math_ops.matmul(x, w)

    prior_map = vi._find_variational_and_priors(y, None)
    self.assertEqual(prior_map[w], prior)
    elbo = vi.elbo(y, keep_batch_dim=False)

    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      sess.run(elbo)

  def testStochasticVariablesWithCallablePriorInitializer(self):

    def prior_init(shape, dtype):
-      return dist.Normal(tf.zeros(shape, dtype), tf.ones(shape, dtype))
+      return dist.Normal(
+          array_ops.zeros(shape, dtype), array_ops.ones(shape, dtype))

-    with tf.variable_scope(
+    with variable_scope.variable_scope(
        "stochastic_variables",
        custom_getter=sv.make_stochastic_variable_getter(
            dist_cls=dist.NormalWithSoftplusSigma, prior=prior_init)):
-      w = tf.get_variable("weights", (10, 20))
+      w = variable_scope.get_variable("weights", (10, 20))

-    x = tf.random_uniform((8, 10))
-    y = tf.matmul(x, w)
+    x = random_ops.random_uniform((8, 10))
+    y = math_ops.matmul(x, w)

    prior_map = vi._find_variational_and_priors(y, None)
    self.assertTrue(isinstance(prior_map[w], dist.Normal))
    elbo = vi.elbo(y, keep_batch_dim=False)

    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      sess.run(elbo)


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/kernel_tests/variational_inference_test.py
+++ b/tensorflow/contrib/bayesflow/python/kernel_tests/variational_inference_test.py
@@ -18,11 +18,28 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
-
-st = tf.contrib.bayesflow.stochastic_tensor
-vi = tf.contrib.bayesflow.variational_inference
-distributions = tf.contrib.distributions
+import sys
+
+# TODO: #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"):
+  import ctypes
+  sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
+
+from tensorflow.contrib import distributions as distributions_lib
+from tensorflow.contrib import layers
+from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor
+from tensorflow.contrib.bayesflow.python.ops import variational_inference
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
+from tensorflow.contrib.distributions.python.ops import normal
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test
+
+st = stochastic_tensor
+vi = variational_inference
+distributions = distributions_lib


 class NormalNoEntropy(distributions.Normal):
@@ -33,45 +50,46 @@ class NormalNoEntropy(distributions.Normal):

 # For mini-VAE
 def inference_net(x, latent_size):
-  return tf.contrib.layers.linear(x, latent_size)
+  return layers.linear(x, latent_size)


 def generative_net(z, data_size):
-  return tf.contrib.layers.linear(z, data_size)
+  return layers.linear(z, data_size)


 def mini_vae():
  x = [[-6., 3., 6.], [-8., 4., 8.]]
  prior = distributions.Normal(mu=0., sigma=1.)
  variational = st.StochasticTensor(
-      distributions.Normal(mu=inference_net(x, 1), sigma=1.))
+      distributions.Normal(
+          mu=inference_net(x, 1), sigma=1.))
  vi.register_prior(variational, prior)
  px = distributions.Normal(mu=generative_net(variational, 3), sigma=1.)
-  log_likelihood = tf.reduce_sum(px.log_prob(x), 1)
-  log_likelihood = tf.expand_dims(log_likelihood, -1)
+  log_likelihood = math_ops.reduce_sum(px.log_prob(x), 1)
+  log_likelihood = array_ops.expand_dims(log_likelihood, -1)
  return x, prior, variational, px, log_likelihood


-class VariationalInferenceTest(tf.test.TestCase):
+class VariationalInferenceTest(test.TestCase):

  def testDefaultVariationalAndPrior(self):
    _, prior, variational, _, log_likelihood = mini_vae()
    elbo = vi.elbo(log_likelihood)
-    expected_elbo = log_likelihood - tf.contrib.distributions.kl(
+    expected_elbo = log_likelihood - kullback_leibler.kl(
        variational.distribution, prior)
    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(*sess.run([expected_elbo, elbo]))

  def testExplicitVariationalAndPrior(self):
    with self.test_session() as sess:
      _, _, variational, _, log_likelihood = mini_vae()
-      prior = tf.contrib.distributions.Normal(mu=3., sigma=2.)
+      prior = normal.Normal(mu=3., sigma=2.)
      elbo = vi.elbo(
          log_likelihood, variational_with_prior={variational: prior})
-      expected_elbo = log_likelihood - tf.contrib.distributions.kl(
+      expected_elbo = log_likelihood - kullback_leibler.kl(
          variational.distribution, prior)
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(*sess.run([expected_elbo, elbo]))

  def testExplicitForms(self):
@@ -79,8 +97,9 @@ class VariationalInferenceTest(tf.test.TestCase):

    elbos = []
    forms = vi.ELBOForms
-    for form in [forms.default, forms.analytic_kl, forms.sample,
-                 forms.analytic_entropy]:
+    for form in [
+        forms.default, forms.analytic_kl, forms.sample, forms.analytic_entropy
+    ]:
      elbo = vi.elbo(
          log_likelihood=log_likelihood,
          variational_with_prior={variational: prior},
@@ -88,23 +107,24 @@ class VariationalInferenceTest(tf.test.TestCase):
      elbos.append(elbo)

    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
-      log_likelihood_shape = tf.shape(log_likelihood).eval()
+      sess.run(variables.global_variables_initializer())
+      log_likelihood_shape = array_ops.shape(log_likelihood).eval()
      for elbo in elbos:
        elbo.eval()
-        elbo_shape = tf.shape(elbo).eval()
+        elbo_shape = array_ops.shape(elbo).eval()
        self.assertAllEqual(log_likelihood_shape, elbo_shape)
        self.assertEqual(elbo.dtype, log_likelihood.dtype)

  def testDefaultsSampleKLWithoutAnalyticKLOrEntropy(self):
-    x = tf.constant([[-6., 3., 6.]])
+    x = constant_op.constant([[-6., 3., 6.]])

    prior = distributions.Bernoulli(0.5)
    variational = st.StochasticTensor(
-        NormalNoEntropy(mu=inference_net(x, 1), sigma=1.))
+        NormalNoEntropy(
+            mu=inference_net(x, 1), sigma=1.))
    vi.register_prior(variational, prior)
    px = distributions.Normal(mu=generative_net(variational, 3), sigma=1.)
-    log_likelihood = tf.reduce_sum(px.log_prob(x), 1)
+    log_likelihood = math_ops.reduce_sum(px.log_prob(x), 1)

    # No analytic KL available between prior and variational distributions.
    with self.assertRaisesRegexp(NotImplementedError, "No KL"):
@@ -117,7 +137,7 @@ class VariationalInferenceTest(tf.test.TestCase):
        variational) - variational.distribution.log_prob(variational)

    with self.test_session() as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      self.assertAllEqual(*sess.run([expected_elbo, elbo]))

  def testElboWithLogJoint(self):
@@ -125,9 +145,9 @@ class VariationalInferenceTest(tf.test.TestCase):
      _, prior, variational, _, log_likelihood = mini_vae()
      log_joint = log_likelihood + prior.log_prob(variational)
      elbo = vi.elbo_with_log_joint(log_joint)
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      elbo.eval()


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/bayesflow/python/ops/stochastic_tensor.py
+++ b/tensorflow/contrib/bayesflow/python/ops/stochastic_tensor.py
@@ -47,8 +47,8 @@ import threading

 import six

-from tensorflow.contrib import distributions
 from tensorflow.contrib.bayesflow.python.ops import stochastic_gradient_estimators as sge
+from tensorflow.contrib.distributions.python.ops import distribution
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops

@@ -164,7 +164,7 @@ class SampleValue(_StochasticValueType):
  sigma = tf.ones((2, 3))
  with sg.value_type(sg.SampleValue()):
    st = sg.StochasticTensor(
-      distributions.Normal, mu=mu, sigma=sigma)
+      tf.contrib.distributions.Normal, mu=mu, sigma=sigma)
  # draws 1 sample and does not reshape
  assertEqual(st.value().get_shape(), (2, 3))
  ```
@@ -174,7 +174,7 @@ class SampleValue(_StochasticValueType):
  sigma = tf.ones((2, 3))
  with sg.value_type(sg.SampleValue(4)):
    st = sg.StochasticTensor(
-      distributions.Normal, mu=mu, sigma=sigma)
+      tf.contrib.distributions.Normal, mu=mu, sigma=sigma)
  # draws 4 samples each with shape (2, 3) and concatenates
  assertEqual(st.value().get_shape(), (4, 2, 3))
  ```
@@ -218,7 +218,8 @@ def value_type(dist_value_type):

  ```
  with sg.value_type(sg.MeanValue(stop_gradients=True)):
-    st = sg.StochasticTensor(distributions.Normal, mu=mu, sigma=sigma)
+    st = sg.StochasticTensor(tf.contrib.distributions.Normal, mu=mu,
+                             sigma=sigma)
  ```

  In the example above, `st.value()` (or equivalently, `tf.identity(st)`) will
@@ -311,7 +312,7 @@ class StochasticTensor(BaseStochasticTensor):
      TypeError: if `dist` is not an instance of `Distribution`.
      TypeError: if `loss_fn` is not `callable`.
    """
-    if not isinstance(dist, distributions.Distribution):
+    if not isinstance(dist, distribution.Distribution):
      raise TypeError("dist must be an instance of Distribution")
    if dist_value_type is None:
      try:
@@ -351,8 +352,8 @@ class StochasticTensor(BaseStochasticTensor):
    elif isinstance(self._value_type, SampleValue):
      value_tensor = self._dist.sample(self._value_type.shape)
    else:
-      raise TypeError(
-          "Unrecognized Distribution Value Type: %s", self._value_type)
+      raise TypeError("Unrecognized Distribution Value Type: %s",
+                      self._value_type)

    if self._value_type.stop_gradient:
      # stop_gradient is being enforced by the value type
@@ -434,7 +435,7 @@ class ObservedStochasticTensor(StochasticTensor):
      TypeError: if `dist` is not an instance of `Distribution`.
      ValueError: if `value` is not compatible with the distribution.
    """
-    if not isinstance(dist, distributions.Distribution):
+    if not isinstance(dist, distribution.Distribution):
      raise TypeError("dist must be an instance of Distribution")
    with ops.name_scope(name, "ObservedStochasticTensor", [value]) as scope:
      self._name = scope

--- a/tensorflow/contrib/bayesflow/python/ops/stochastic_variables.py
+++ b/tensorflow/contrib/bayesflow/python/ops/stochastic_variables.py
@@ -26,13 +26,12 @@ import functools

 from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor as st
 from tensorflow.contrib.bayesflow.python.ops import variational_inference as vi
-from tensorflow.contrib.distributions.python.ops import normal


 def get_stochastic_variable(getter,
                            name,
                            shape=None,
-                            dist_cls=normal.NormalWithSoftplusSigma,
+                            dist_cls=None,
                            dist_kwargs=None,
                            param_initializers=None,
                            prior=None,

--- a/tensorflow/contrib/bayesflow/python/ops/variational_inference.py
+++ b/tensorflow/contrib/bayesflow/python/ops/variational_inference.py
@@ -27,9 +27,10 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-from tensorflow.contrib import distributions
 from tensorflow.contrib.bayesflow.python.ops import stochastic_graph as sg
 from tensorflow.contrib.bayesflow.python.ops import stochastic_tensor as st
+from tensorflow.contrib.distributions.python.ops import distribution
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
@@ -57,7 +58,7 @@ def register_prior(variational, prior):
  """
  if not isinstance(variational, st.StochasticTensor):
    raise TypeError("variational must be a StochasticTensor")
-  if not isinstance(prior, distributions.Distribution):
+  if not isinstance(prior, distribution.Distribution):
    raise TypeError("prior must be a Distribution")
  ops.add_to_collection(VI_PRIORS, (variational, prior))

@@ -84,8 +85,10 @@ class ELBOForms(object):

  @staticmethod
  def check_form(form):
-    if form not in {ELBOForms.default, ELBOForms.analytic_kl,
-                    ELBOForms.analytic_entropy, ELBOForms.sample}:
+    if form not in {
+        ELBOForms.default, ELBOForms.analytic_kl, ELBOForms.analytic_entropy,
+        ELBOForms.sample
+    }:
      raise TypeError("form must be an ELBOForms constant")


@@ -257,7 +260,7 @@ def _elbo(form, log_likelihood, log_joint, variational_with_prior,
    kl = None
    if log_joint is None and form in {ELBOForms.default, ELBOForms.analytic_kl}:
      try:
-        kl = distributions.kl(q, p)
+        kl = kullback_leibler.kl(q, p)
        logging.info("Using analytic KL between q:%s, p:%s", q, p)
      except NotImplementedError as e:
        if form == ELBOForms.analytic_kl:
@@ -316,8 +319,10 @@ def _find_variational_and_priors(model,
  if not all(
      [isinstance(q, st.StochasticTensor) for q in variational_with_prior]):
    raise TypeError("variationals must be StochasticTensors")
-  if not all([p is None or isinstance(p, distributions.Distribution)
-              for p in variational_with_prior.values()]):
-    raise TypeError("priors must be Distributions")
+  if not all([
+      p is None or isinstance(p, distribution.Distribution)
+      for p in variational_with_prior.values()
+  ]):
+    raise TypeError("priors must be Distribution objects")

  return variational_with_prior
--- a/tensorflow/contrib/copy_graph/BUILD
+++ b/tensorflow/contrib/copy_graph/BUILD
@@ -29,10 +29,16 @@ py_test(
    srcs_version = "PY2AND3",
    deps = [
        ":copy_graph_py",
-        "//tensorflow:tensorflow_py",
        "//tensorflow/contrib/framework:framework_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
+        "//third_party/py/numpy",
    ],
 )


--- a/tensorflow/contrib/copy_graph/python/util/copy_test.py
+++ b/tensorflow/contrib/copy_graph/python/util/copy_test.py
@@ -12,91 +12,93 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """Tests for contrib.copy_graph.python.util.copy."""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf
+from tensorflow.contrib.copy_graph.python.util import copy_elements
 from tensorflow.contrib.framework.python.framework import tensor_util
+from tensorflow.python.client import session as session_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import test

-graph1 = tf.Graph()
-graph2 = tf.Graph()
+graph1 = ops.Graph()
+graph2 = ops.Graph()


-class CopyVariablesTest(tf.test.TestCase):
+class CopyVariablesTest(test.TestCase):

  def testVariableCopy(self):

    with graph1.as_default():
      #Define a Variable in graph1
-      some_var = tf.Variable(2)
+      some_var = variables.Variable(2)
      #Initialize session
-      sess1 = tf.Session()
+      sess1 = session_lib.Session()
      #Initialize the Variable
-      tf.global_variables_initializer().run(session=sess1)
+      variables.global_variables_initializer().run(session=sess1)

    #Make a copy of some_var in the defsult scope in graph2
-    copy1 = tf.contrib.copy_graph.copy_variable_to_graph(
-        some_var, graph2)
+    copy1 = copy_elements.copy_variable_to_graph(some_var, graph2)

    #Make another copy with different scope
-    copy2 = tf.contrib.copy_graph.copy_variable_to_graph(
-        some_var, graph2, "test_scope")
+    copy2 = copy_elements.copy_variable_to_graph(some_var, graph2, "test_scope")

    #Initialize both the copies
    with graph2.as_default():
      #Initialize Session
-      sess2 = tf.Session()
+      sess2 = session_lib.Session()
      #Initialize the Variables
-      tf.global_variables_initializer().run(session=sess2)
+      variables.global_variables_initializer().run(session=sess2)

    #Ensure values in all three variables are the same
    v1 = some_var.eval(session=sess1)
    v2 = copy1.eval(session=sess2)
    v3 = copy2.eval(session=sess2)

-    assert isinstance(copy1, tf.Variable)
-    assert isinstance(copy2, tf.Variable)
+    assert isinstance(copy1, variables.Variable)
+    assert isinstance(copy2, variables.Variable)
    assert v1 == v2 == v3 == 2


-class CopyOpsTest(tf.test.TestCase):
+class CopyOpsTest(test.TestCase):

  def testOpsCopy(self):

    with graph1.as_default():
      #Initialize a basic expression y = ax + b
-      x = tf.placeholder("float")
-      a = tf.Variable(3.0)
-      b = tf.constant(4.0)
-      ax = tf.multiply(x, a)
-      y = tf.add(ax, b)
+      x = array_ops.placeholder("float")
+      a = variables.Variable(3.0)
+      b = constant_op.constant(4.0)
+      ax = math_ops.multiply(x, a)
+      y = math_ops.add(ax, b)
      #Initialize session
-      sess1 = tf.Session()
+      sess1 = session_lib.Session()
      #Initialize the Variable
-      tf.global_variables_initializer().run(session=sess1)
+      variables.global_variables_initializer().run(session=sess1)

    #First, initialize a as a Variable in graph2
-    a1 = tf.contrib.copy_graph.copy_variable_to_graph(
-        a, graph2)
+    a1 = copy_elements.copy_variable_to_graph(a, graph2)

    #Initialize a1 in graph2
    with graph2.as_default():
      #Initialize session
-      sess2 = tf.Session()
+      sess2 = session_lib.Session()
      #Initialize the Variable
-      tf.global_variables_initializer().run(session=sess2)
+      variables.global_variables_initializer().run(session=sess2)

    #Initialize a copy of y in graph2
-    y1 = tf.contrib.copy_graph.copy_op_to_graph(
-        y, graph2, [a1])
+    y1 = copy_elements.copy_op_to_graph(y, graph2, [a1])

    #Now that y has been copied, x must be copied too.
    #Get that instance
-    x1 = tf.contrib.copy_graph.get_copied_op(x, graph2)
+    x1 = copy_elements.get_copied_op(x, graph2)

    #Compare values of y & y1 for a sample input
    #and check if they match
@@ -107,4 +109,4 @@ class CopyOpsTest(tf.test.TestCase):


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/crf/BUILD
+++ b/tensorflow/contrib/crf/BUILD
@@ -21,6 +21,7 @@ py_library(
        "//tensorflow/python:math_ops",
        "//tensorflow/python:rnn",
        "//tensorflow/python:variable_scope",
+        "//third_party/py/numpy",
    ],
 )

@@ -29,8 +30,12 @@ cuda_py_tests(
    srcs = ["python/kernel_tests/crf_test.py"],
    additional_deps = [
        ":crf_py",
-        "//tensorflow:tensorflow_py",
+        "//third_party/py/numpy",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
    ],
 )

--- a/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
+++ b/tensorflow/contrib/crf/python/kernel_tests/crf_test.py
@@ -19,12 +19,23 @@ from __future__ import division
 from __future__ import print_function

 import itertools
+import sys

 import numpy as np
-import tensorflow as tf

+# TODO: #6568 Remove this hack that makes dlopen() not crash.
+if hasattr(sys, "getdlopenflags") and hasattr(sys, "setdlopenflags"):
+  import ctypes
+  sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)

-class CrfTest(tf.test.TestCase):
+from tensorflow.contrib.crf.python.ops import crf
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test
+
+
+class CrfTest(test.TestCase):

  def testCrfSequenceScore(self):
    inputs = np.array(
@@ -34,12 +45,12 @@ class CrfTest(tf.test.TestCase):
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    sequence_lengths = np.array(3, dtype=np.int32)
    with self.test_session() as sess:
-      sequence_score = tf.contrib.crf.crf_sequence_score(
-          inputs=tf.expand_dims(inputs, 0),
-          tag_indices=tf.expand_dims(tag_indices, 0),
-          sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-          transition_params=tf.constant(transition_params))
-      sequence_score = tf.squeeze(sequence_score, [0])
+      sequence_score = crf.crf_sequence_score(
+          inputs=array_ops.expand_dims(inputs, 0),
+          tag_indices=array_ops.expand_dims(tag_indices, 0),
+          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+          transition_params=constant_op.constant(transition_params))
+      sequence_score = array_ops.squeeze(sequence_score, [0])
      tf_sequence_score = sess.run(sequence_score)
      expected_unary_score = sum(inputs[i][tag_indices[i]]
                                 for i in range(sequence_lengths))
@@ -55,11 +66,11 @@ class CrfTest(tf.test.TestCase):
    tag_indices = np.array([1, 2, 1, 0], dtype=np.int32)
    sequence_lengths = np.array(3, dtype=np.int32)
    with self.test_session() as sess:
-      unary_score = tf.contrib.crf.crf_unary_score(
-          tag_indices=tf.expand_dims(tag_indices, 0),
-          sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-          inputs=tf.expand_dims(inputs, 0))
-      unary_score = tf.squeeze(unary_score, [0])
+      unary_score = crf.crf_unary_score(
+          tag_indices=array_ops.expand_dims(tag_indices, 0),
+          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+          inputs=array_ops.expand_dims(inputs, 0))
+      unary_score = array_ops.squeeze(unary_score, [0])
      tf_unary_score = sess.run(unary_score)
      expected_unary_score = sum(inputs[i][tag_indices[i]]
                                 for i in range(sequence_lengths))
@@ -71,11 +82,11 @@ class CrfTest(tf.test.TestCase):
        [[-3, 5, -2], [3, 4, 1], [1, 2, 1]], dtype=np.float32)
    sequence_lengths = np.array(3, dtype=np.int32)
    with self.test_session() as sess:
-      binary_score = tf.contrib.crf.crf_binary_score(
-          tag_indices=tf.expand_dims(tag_indices, 0),
-          sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-          transition_params=tf.constant(transition_params))
-      binary_score = tf.squeeze(binary_score, [0])
+      binary_score = crf.crf_binary_score(
+          tag_indices=array_ops.expand_dims(tag_indices, 0),
+          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+          transition_params=constant_op.constant(transition_params))
+      binary_score = array_ops.squeeze(binary_score, [0])
      tf_binary_score = sess.run(binary_score)
      expected_binary_score = sum(
          transition_params[tag_indices[i], tag_indices[i + 1]]
@@ -99,18 +110,18 @@ class CrfTest(tf.test.TestCase):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        all_sequence_scores.append(
-            tf.contrib.crf.crf_sequence_score(
-                inputs=tf.expand_dims(inputs, 0),
-                tag_indices=tf.expand_dims(tag_indices, 0),
-                sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-                transition_params=tf.constant(transition_params)))
-
-      brute_force_log_norm = tf.reduce_logsumexp(all_sequence_scores)
-      log_norm = tf.contrib.crf.crf_log_norm(
-          inputs=tf.expand_dims(inputs, 0),
-          sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-          transition_params=tf.constant(transition_params))
-      log_norm = tf.squeeze(log_norm, [0])
+            crf.crf_sequence_score(
+                inputs=array_ops.expand_dims(inputs, 0),
+                tag_indices=array_ops.expand_dims(tag_indices, 0),
+                sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+                transition_params=constant_op.constant(transition_params)))
+
+      brute_force_log_norm = math_ops.reduce_logsumexp(all_sequence_scores)
+      log_norm = crf.crf_log_norm(
+          inputs=array_ops.expand_dims(inputs, 0),
+          sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+          transition_params=constant_op.constant(transition_params))
+      log_norm = array_ops.squeeze(log_norm, [0])
      tf_brute_force_log_norm, tf_log_norm = sess.run(
          [brute_force_log_norm, log_norm])

@@ -132,13 +143,14 @@ class CrfTest(tf.test.TestCase):
          range(num_tags), repeat=sequence_lengths):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
-        sequence_log_likelihood, _ = tf.contrib.crf.crf_log_likelihood(
-            inputs=tf.expand_dims(inputs, 0),
-            tag_indices=tf.expand_dims(tag_indices, 0),
-            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-            transition_params=tf.constant(transition_params))
+        sequence_log_likelihood, _ = crf.crf_log_likelihood(
+            inputs=array_ops.expand_dims(inputs, 0),
+            tag_indices=array_ops.expand_dims(tag_indices, 0),
+            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+            transition_params=constant_op.constant(transition_params))
        all_sequence_log_likelihoods.append(sequence_log_likelihood)
-      total_log_likelihood = tf.reduce_logsumexp(all_sequence_log_likelihoods)
+      total_log_likelihood = math_ops.reduce_logsumexp(
+          all_sequence_log_likelihoods)
      tf_total_log_likelihood = sess.run(total_log_likelihood)
      self.assertAllClose(tf_total_log_likelihood, 0.0)

@@ -146,9 +158,7 @@ class CrfTest(tf.test.TestCase):
    with self.test_session() as sess:
      sequence_lengths = [4, 1, 8, 2]
      max_sequence_length = max(sequence_lengths)
-
-      mask = tf.contrib.crf._lengths_to_masks(sequence_lengths,
-                                              max_sequence_length)
+      mask = crf._lengths_to_masks(sequence_lengths, max_sequence_length)
      tf_mask = sess.run(mask)
      self.assertEqual(len(tf_mask), len(sequence_lengths))
      for m, l in zip(tf_mask, sequence_lengths):
@@ -174,12 +184,12 @@ class CrfTest(tf.test.TestCase):
        tag_indices = list(tag_indices)
        tag_indices.extend([0] * (num_words - sequence_lengths))
        all_sequences.append(tag_indices)
-        sequence_score = tf.contrib.crf.crf_sequence_score(
-            inputs=tf.expand_dims(inputs, 0),
-            tag_indices=tf.expand_dims(tag_indices, 0),
-            sequence_lengths=tf.expand_dims(sequence_lengths, 0),
-            transition_params=tf.constant(transition_params))
-        sequence_score = tf.squeeze(sequence_score, [0])
+        sequence_score = crf.crf_sequence_score(
+            inputs=array_ops.expand_dims(inputs, 0),
+            tag_indices=array_ops.expand_dims(tag_indices, 0),
+            sequence_lengths=array_ops.expand_dims(sequence_lengths, 0),
+            transition_params=constant_op.constant(transition_params))
+        sequence_score = array_ops.squeeze(sequence_score, [0])
        all_sequence_scores.append(sequence_score)

      tf_all_sequence_scores = sess.run(all_sequence_scores)
@@ -188,7 +198,7 @@ class CrfTest(tf.test.TestCase):
      expected_max_sequence = all_sequences[expected_max_sequence_index]
      expected_max_score = tf_all_sequence_scores[expected_max_sequence_index]

-      actual_max_sequence, actual_max_score = tf.contrib.crf.viterbi_decode(
+      actual_max_sequence, actual_max_score = crf.viterbi_decode(
          inputs[:sequence_lengths], transition_params)

      self.assertAllClose(actual_max_score, expected_max_score)
@@ -197,4 +207,4 @@ class CrfTest(tf.test.TestCase):


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/crf/python/ops/crf.py
+++ b/tensorflow/contrib/crf/python/ops/crf.py
@@ -41,16 +41,17 @@ from __future__ import print_function

 import numpy as np

-from tensorflow.contrib import rnn as contrib_rnn
+from tensorflow.contrib.rnn.python.ops import core_rnn_cell
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import rnn
 from tensorflow.python.ops import variable_scope as vs

-__all__ = ["crf_sequence_score", "crf_log_norm", "crf_log_likelihood",
-           "crf_unary_score", "crf_binary_score", "CrfForwardRnnCell",
-           "viterbi_decode"]
+__all__ = [
+    "crf_sequence_score", "crf_log_norm", "crf_log_likelihood",
+    "crf_unary_score", "crf_binary_score", "CrfForwardRnnCell", "viterbi_decode"
+]


 def _lengths_to_masks(lengths, max_length):
@@ -224,7 +225,7 @@ def crf_binary_score(tag_indices, sequence_lengths, transition_params):
  return binary_scores


-class CrfForwardRnnCell(contrib_rnn.RNNCell):
+class CrfForwardRnnCell(core_rnn_cell.RNNCell):
  """Computes the alpha values in a linear-chain CRF.

  See http://www.cs.columbia.edu/~mcollins/fb.pdf for reference.

--- a/tensorflow/contrib/cudnn_rnn/BUILD
+++ b/tensorflow/contrib/cudnn_rnn/BUILD
@@ -68,10 +68,17 @@ cuda_py_test(
    srcs = ["python/kernel_tests/cudnn_rnn_ops_test.py"],
    additional_deps = [
        ":cudnn_rnn_py",
-        "//tensorflow:tensorflow_py",
+        "//tensorflow/core:protos_all_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework",
        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:math_ops",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:random_ops",
+        "//tensorflow/python:state_ops",
+        "//tensorflow/python:training",
        "//tensorflow/python:variables",
    ],
    tags = [
@@ -86,10 +93,18 @@ cuda_py_test(
    srcs = ["python/kernel_tests/cudnn_rnn_ops_benchmark.py"],
    additional_deps = [
        ":cudnn_rnn_py",
-        "//tensorflow:tensorflow_py",
        "//tensorflow/contrib/rnn:rnn_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:control_flow_ops",
+        "//tensorflow/python:framework_for_generated_wrappers",
        "//tensorflow/python:framework_test_lib",
+        "//tensorflow/python:gradients",
+        "//tensorflow/python:init_ops",
+        "//tensorflow/python:platform",
        "//tensorflow/python:platform_test",
+        "//tensorflow/python:variables",
    ],
    tags = [
        "manual",

--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_benchmark.py
@@ -19,13 +19,26 @@ from __future__ import division
 from __future__ import print_function

 import time
-import tensorflow as tf
-
-tf.app.flags.DEFINE_integer("batch_size", 64, "batch size.")
-FLAGS = tf.app.flags.FLAGS
-
-
-class CudnnRNNBenchmark(tf.test.Benchmark):
+from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
+from tensorflow.contrib.rnn.python.ops import core_rnn
+from tensorflow.contrib.rnn.python.ops import core_rnn_cell_impl
+from tensorflow.contrib.rnn.python.ops import lstm_ops
+from tensorflow.python.client import session
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.platform import flags
+from tensorflow.python.platform import test
+
+flags.DEFINE_integer("batch_size", 64, "batch size.")
+FLAGS = flags.FLAGS
+
+
+class CudnnRNNBenchmark(test.Benchmark):
  """Benchmarks Cudnn LSTM and other related models.
  """

@@ -62,8 +75,8 @@ class CudnnRNNBenchmark(tf.test.Benchmark):
  def _BenchmarkOp(self, op, desc):
    burn_in_steps = 10
    benchmark_steps = 40
-    with tf.Session() as sess:
-      sess.run(tf.global_variables_initializer())
+    with session.Session() as sess:
+      sess.run(variables.global_variables_initializer())
      for i in xrange(burn_in_steps + benchmark_steps):
        if i == burn_in_steps:
          start_time = time.time()
@@ -83,22 +96,27 @@ class CudnnRNNBenchmark(tf.test.Benchmark):
      batch_size = config["batch_size"]
      seq_length = config["seq_length"]

-      with tf.Graph().as_default(), tf.device("/gpu:0"):
-        model = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers, num_units, num_units)
+      with ops.Graph().as_default(), ops.device("/gpu:0"):
+        model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, num_units)
        params_size_t = model.params_size()
-        input_data = tf.Variable(tf.ones([seq_length, batch_size, num_units]))
-        input_h = tf.Variable(tf.ones([num_layers, batch_size, num_units]))
-        input_c = tf.Variable(tf.ones([num_layers, batch_size, num_units]))
-        params = tf.Variable(tf.ones([params_size_t]), validate_shape=False)
+        input_data = variables.Variable(
+            array_ops.ones([seq_length, batch_size, num_units]))
+        input_h = variables.Variable(
+            array_ops.ones([num_layers, batch_size, num_units]))
+        input_c = variables.Variable(
+            array_ops.ones([num_layers, batch_size, num_units]))
+        params = variables.Variable(
+            array_ops.ones([params_size_t]), validate_shape=False)
        output, output_h, output_c = model(
            is_training=True,
            input_data=input_data,
            input_h=input_h,
            input_c=input_c,
            params=params)
-        all_grads = tf.gradients([output, output_h, output_c],
-                                 [params, input_data, input_h, input_c])
-        training_op = tf.group(*all_grads)
+        all_grads = gradients_impl.gradients(
+            [output, output_h, output_c],
+            [params, input_data, input_h, input_c])
+        training_op = control_flow_ops.group(*all_grads)
        self._BenchmarkOp(training_op, "cudnn_lstm %s %s" %
                          (config_name, self._GetConfigDesc(config)))

@@ -110,19 +128,22 @@ class CudnnRNNBenchmark(tf.test.Benchmark):
      batch_size = config["batch_size"]
      seq_length = config["seq_length"]

-      with tf.Graph().as_default(), tf.device("/gpu:0"):
-        inputs = seq_length * [tf.zeros([batch_size, num_units], tf.float32)]
-        initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=127)
+      with ops.Graph().as_default(), ops.device("/gpu:0"):
+        inputs = seq_length * [
+            array_ops.zeros([batch_size, num_units], dtypes.float32)
+        ]
+        initializer = init_ops.random_uniform_initializer(-0.01, 0.01, seed=127)

-        cell = tf.contrib.rnn.LSTMCell(
+        cell = core_rnn_cell_impl.LSTMCell(
            num_units=num_units, initializer=initializer, state_is_tuple=True)
-        multi_cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
-        outputs, final_state = tf.contrib.rnn.static_rnn(
-            multi_cell, inputs, dtype=tf.float32)
-        trainable_variables = tf.get_collection(
-            tf.GraphKeys.TRAINABLE_VARIABLES)
-        gradients = tf.gradients([outputs, final_state], trainable_variables)
-        training_op = tf.group(*gradients)
+        multi_cell = core_rnn_cell_impl.MultiRNNCell([cell] * num_layers)
+        outputs, final_state = core_rnn.static_rnn(
+            multi_cell, inputs, dtype=dtypes.float32)
+        trainable_variables = ops.get_collection(
+            ops.GraphKeys.TRAINABLE_VARIABLES)
+        gradients = gradients_impl.gradients([outputs, final_state],
+                                             trainable_variables)
+        training_op = control_flow_ops.group(*gradients)
        self._BenchmarkOp(training_op, "tf_rnn_lstm %s %s" %
                          (config_name, self._GetConfigDesc(config)))

@@ -134,20 +155,22 @@ class CudnnRNNBenchmark(tf.test.Benchmark):
      batch_size = config["batch_size"]
      seq_length = config["seq_length"]

-      with tf.Graph().as_default(), tf.device("/gpu:0"):
-        inputs = seq_length * [tf.zeros([batch_size, num_units], tf.float32)]
-        cell = tf.contrib.rnn.python.ops.lstm_ops.LSTMBlockCell(
-            num_units=num_units)
-        multi_cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)
-        outputs, final_state = tf.contrib.rnn.static_rnn(
-            multi_cell, inputs, dtype=tf.float32)
-        trainable_variables = tf.get_collection(
-            tf.GraphKeys.TRAINABLE_VARIABLES)
-        gradients = tf.gradients([outputs, final_state], trainable_variables)
-        training_op = tf.group(*gradients)
+      with ops.Graph().as_default(), ops.device("/gpu:0"):
+        inputs = seq_length * [
+            array_ops.zeros([batch_size, num_units], dtypes.float32)
+        ]
+        cell = lstm_ops.LSTMBlockCell(num_units=num_units)
+        multi_cell = core_rnn_cell_impl.MultiRNNCell([cell] * num_layers)
+        outputs, final_state = core_rnn.static_rnn(
+            multi_cell, inputs, dtype=dtypes.float32)
+        trainable_variables = ops.get_collection(
+            ops.GraphKeys.TRAINABLE_VARIABLES)
+        gradients = gradients_impl.gradients([outputs, final_state],
+                                             trainable_variables)
+        training_op = control_flow_ops.group(*gradients)
        self._BenchmarkOp(training_op, "tf_rnn_lstm_block_cell %s %s" %
                          (config_name, self._GetConfigDesc(config)))


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
+++ b/tensorflow/contrib/cudnn_rnn/python/kernel_tests/cudnn_rnn_ops_test.py
@@ -20,26 +20,33 @@ from __future__ import print_function

 import os
 import unittest
-import tensorflow as tf
+from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops
+from tensorflow.core.protobuf import saver_pb2
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import random_seed
 from tensorflow.python.framework.test_util import TensorFlowTestCase
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import googletest
+from tensorflow.python.platform import test
+from tensorflow.python.training import saver as saver_lib


 class CudnnRNNTest(TensorFlowTestCase):

  def _CreateModel(self, rnn_mode, num_layers, num_units, input_size):
    if rnn_mode == "lstm":
-      model = tf.contrib.cudnn_rnn.CudnnLSTM(num_layers, num_units, input_size)
+      model = cudnn_rnn_ops.CudnnLSTM(num_layers, num_units, input_size)
    elif rnn_mode == "gru":
-      model = tf.contrib.cudnn_rnn.CudnnGRU(num_layers, num_units, input_size)
+      model = cudnn_rnn_ops.CudnnGRU(num_layers, num_units, input_size)
    elif rnn_mode == "rnn_tanh":
-      model = tf.contrib.cudnn_rnn.CudnnRNNTanh(num_layers, num_units,
-                                                input_size)
+      model = cudnn_rnn_ops.CudnnRNNTanh(num_layers, num_units, input_size)
    elif rnn_mode == "rnn_relu":
-      model = tf.contrib.cudnn_rnn.CudnnRNNRelu(num_layers, num_units,
-                                                input_size)
+      model = cudnn_rnn_ops.CudnnRNNRelu(num_layers, num_units, input_size)
    else:
      raise ValueError("Invalid rnn_mode: %s" % rnn_mode)
    return model
@@ -51,26 +58,27 @@ class CudnnRNNTest(TensorFlowTestCase):
      params: a Variable for weight and bias parameters.
      model: a CudnnRNN model.
    """
-    params_saveable = tf.contrib.cudnn_rnn.RNNParamsSaveable(
-        model.params_to_canonical, model.canonical_to_params, params)
+    params_saveable = cudnn_rnn_ops.RNNParamsSaveable(model.params_to_canonical,
+                                                      model.canonical_to_params,
+                                                      params)
    ops.add_to_collection(ops.GraphKeys.SAVEABLE_OBJECTS, params_saveable)

  def _testSaveRestoreVariable(self, rnn_mode):
    model = self._CreateModel(rnn_mode, num_layers=2, num_units=7, input_size=3)
-    tf.set_random_seed(1234)
+    random_seed.set_random_seed(1234)
    params_size_t = model.params_size()
    params = variables.Variable(
-        tf.random_uniform([params_size_t]), validate_shape=False)
+        random_ops.random_uniform([params_size_t]), validate_shape=False)
    self._create_params_savable(params, model)
    save_path = os.path.join(self.get_temp_dir(), "save-restore-variable-test")
-    saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
+    saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)
    with self.test_session(use_gpu=True) as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      params_v = sess.run(params)
      val = saver.save(sess, save_path)
      self.assertEqual(save_path, val)
    with self.test_session(use_gpu=True) as sess:
-      reset_params = tf.assign(params, tf.zeros([params_size_t]))
+      reset_params = state_ops.assign(params, array_ops.zeros([params_size_t]))
      sess.run(reset_params)
      saver.restore(sess, save_path)
      params_v_restored = sess.run(params)
@@ -85,16 +93,17 @@ class CudnnRNNTest(TensorFlowTestCase):
    dir_count = 1
    model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
    params_size_t = model.params_size()
-    params = variables.Variable(tf.ones([params_size_t]), validate_shape=False)
+    params = variables.Variable(
+        array_ops.ones([params_size_t]), validate_shape=False)
    self._create_params_savable(params, model)
    save_path = os.path.join(self.get_temp_dir(), "save-restore-output-test")
-    saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
+    saver = saver_lib.Saver(write_version=saver_pb2.SaverDef.V2)

    has_input_c = (rnn_mode == "lstm")
-    input_data = tf.ones([seq_length, batch_size, input_size])
-    input_h = tf.ones([num_layers * dir_count, batch_size, num_units])
+    input_data = array_ops.ones([seq_length, batch_size, input_size])
+    input_h = array_ops.ones([num_layers * dir_count, batch_size, num_units])
    if has_input_c:
-      input_c = tf.ones([num_layers * dir_count, batch_size, num_units])
+      input_c = array_ops.ones([num_layers * dir_count, batch_size, num_units])
      outputs = model(
          input_data=input_data,
          input_h=input_h,
@@ -107,20 +116,20 @@ class CudnnRNNTest(TensorFlowTestCase):
          input_h=input_h,
          params=params,
          is_training=False)
-    total_sum = sum(map(tf.reduce_sum, outputs))
+    total_sum = sum(map(math_ops.reduce_sum, outputs))
    with self.test_session(use_gpu=True) as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      total_sum_v = sess.run(total_sum)
      val = saver.save(sess, save_path)
      self.assertEqual(save_path, val)
    with self.test_session(use_gpu=True) as sess:
-      reset_params = tf.assign(params, tf.zeros([params_size_t]))
+      reset_params = state_ops.assign(params, array_ops.zeros([params_size_t]))
      sess.run(reset_params)
      saver.restore(sess, save_path)
      total_sum_v_restored = sess.run(total_sum)
      self.assertAllEqual(total_sum_v, total_sum_v_restored)

-  @unittest.skipUnless(tf.test.is_built_with_cuda(),
+  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSaveRestore(self):
    rnn_modes = ["lstm", "gru", "rnn_tanh", "rnn_relu"]
@@ -150,7 +159,7 @@ class CudnnRNNTest(TensorFlowTestCase):
      params_size_v = sess.run(params_size)
      self.assertLessEqual(min_params_size, params_size_v)

-  @unittest.skipUnless(tf.test.is_built_with_cuda(),
+  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testLSTMParamsSize(self):
    test_configs = [
@@ -161,7 +170,7 @@ class CudnnRNNTest(TensorFlowTestCase):
        [2, 200, 100],
        [3, 200, 400],
    ]
-    with tf.Graph().as_default():
+    with ops.Graph().as_default():
      for (num_layers, num_units, input_size) in test_configs:
        self._testOneLSTMParamsSize(num_layers, num_units, input_size)

@@ -171,11 +180,12 @@ class CudnnRNNTest(TensorFlowTestCase):
    model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
    has_input_c = (rnn_mode == "lstm")
    params_size_t = model.params_size()
-    input_data = tf.ones([seq_length, batch_size, input_size])
-    input_h = tf.ones([num_layers * dir_count, batch_size, num_units])
-    params = tf.Variable(tf.ones([params_size_t]), validate_shape=False)
+    input_data = array_ops.ones([seq_length, batch_size, input_size])
+    input_h = array_ops.ones([num_layers * dir_count, batch_size, num_units])
+    params = variables.Variable(
+        array_ops.ones([params_size_t]), validate_shape=False)
    if has_input_c:
-      input_c = tf.ones([num_layers * dir_count, batch_size, num_units])
+      input_c = array_ops.ones([num_layers * dir_count, batch_size, num_units])
      output, output_h, output_c = model(
          input_data=input_data,
          input_h=input_h,
@@ -188,68 +198,76 @@ class CudnnRNNTest(TensorFlowTestCase):
          input_h=input_h,
          params=params,
          is_training=False)
-    output_sum = tf.reduce_sum(output)
-    output_h_sum = tf.reduce_sum(output_h)
+    output_sum = math_ops.reduce_sum(output)
+    output_h_sum = math_ops.reduce_sum(output_h)
    total_sum = output_sum + output_h_sum
    if has_input_c:
-      output_c_sum = tf.reduce_sum(output_c)
+      output_c_sum = math_ops.reduce_sum(output_c)
      total_sum += output_c_sum
    with self.test_session(use_gpu=True) as sess:
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      total_sum_v = sess.run([total_sum])
      self.assertAllClose(
          total_sum_v[0], expected, atol=tolerance, rtol=tolerance)

-  @unittest.skipUnless(tf.test.is_built_with_cuda(),
+  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleInference(self):
    test_configs = [
-        ["lstm",
-         231833.22,
-         1e-2,
-         {
-             "num_layers": 4,
-             "num_units": 200,
-             "input_size": 200,
-             "batch_size": 20,
-             "seq_length": 10,
-             "dir_count": 1,
-         },],
-        ["gru",
-         56000,
-         1e-2,
-         {
-             "num_layers": 4,
-             "num_units": 200,
-             "input_size": 200,
-             "batch_size": 20,
-             "seq_length": 10,
-             "dir_count": 1,
-         },],
-        ["rnn_tanh",
-         56000,
-         1e-2,
-         {
-             "num_layers": 4,
-             "num_units": 200,
-             "input_size": 200,
-             "batch_size": 20,
-             "seq_length": 10,
-             "dir_count": 1,
-         },],
-        ["rnn_relu",
-         130688,
-         1e-2,
-         {
-             "num_layers": 2,
-             "num_units": 8,
-             "input_size": 4,
-             "batch_size": 4,
-             "seq_length": 2,
-             "dir_count": 1,
-         },],
+        [
+            "lstm",
+            231833.22,
+            1e-2,
+            {
+                "num_layers": 4,
+                "num_units": 200,
+                "input_size": 200,
+                "batch_size": 20,
+                "seq_length": 10,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "gru",
+            56000,
+            1e-2,
+            {
+                "num_layers": 4,
+                "num_units": 200,
+                "input_size": 200,
+                "batch_size": 20,
+                "seq_length": 10,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "rnn_tanh",
+            56000,
+            1e-2,
+            {
+                "num_layers": 4,
+                "num_units": 200,
+                "input_size": 200,
+                "batch_size": 20,
+                "seq_length": 10,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "rnn_relu",
+            130688,
+            1e-2,
+            {
+                "num_layers": 2,
+                "num_units": 8,
+                "input_size": 4,
+                "batch_size": 4,
+                "seq_length": 2,
+                "dir_count": 1,
+            },
+        ],
    ]
-    with tf.Graph().as_default():
+    with ops.Graph().as_default():
      for config in test_configs:
        rnn_mode = config[0]
        expected = config[1]
@@ -263,18 +281,20 @@ class CudnnRNNTest(TensorFlowTestCase):
  def _testOneSimpleTraining(self, rnn_mode, num_layers, num_units, input_size,
                             batch_size, seq_length, dir_count, tolerance):
    has_input_c = (rnn_mode == "lstm")
-    tf.set_random_seed(1234)
+    random_seed.set_random_seed(1234)
    model = self._CreateModel(rnn_mode, num_layers, num_units, input_size)
    params_size_t = model.params_size()
-    input_data = tf.Variable(
-        tf.random_uniform([seq_length, batch_size, input_size]))
-    input_h = tf.Variable(
-        tf.random_uniform([num_layers * dir_count, batch_size, num_units]))
-    params = tf.Variable(
-        tf.random_uniform([params_size_t]), validate_shape=False)
+    input_data = variables.Variable(
+        random_ops.random_uniform([seq_length, batch_size, input_size]))
+    input_h = variables.Variable(
+        random_ops.random_uniform(
+            [num_layers * dir_count, batch_size, num_units]))
+    params = variables.Variable(
+        random_ops.random_uniform([params_size_t]), validate_shape=False)
    if has_input_c:
-      input_c = tf.Variable(
-          tf.random_uniform([num_layers * dir_count, batch_size, num_units]))
+      input_c = variables.Variable(
+          random_ops.random_uniform(
+              [num_layers * dir_count, batch_size, num_units]))
      output, output_h, output_c = model(
          input_data=input_data,
          input_h=input_h,
@@ -283,11 +303,11 @@ class CudnnRNNTest(TensorFlowTestCase):
    else:
      output, output_h = model(
          input_data=input_data, input_h=input_h, params=params)
-    output_sum = tf.reduce_sum(output)
-    output_h_sum = tf.reduce_sum(output_h)
+    output_sum = math_ops.reduce_sum(output)
+    output_h_sum = math_ops.reduce_sum(output_h)
    total_sum = output_sum + output_h_sum
    if has_input_c:
-      output_c_sum = tf.reduce_sum(output_c)
+      output_c_sum = math_ops.reduce_sum(output_c)
      total_sum += output_c_sum

    with self.test_session(use_gpu=True) as sess:
@@ -300,59 +320,67 @@ class CudnnRNNTest(TensorFlowTestCase):
      if has_input_c:
        inputs_and_shapes.append(
            (input_c, [num_layers * dir_count, batch_size, num_units]),)
-      sess.run(tf.global_variables_initializer())
+      sess.run(variables.global_variables_initializer())
      all_inputs = [entry[0] for entry in inputs_and_shapes]
      all_shapes = [entry[1] for entry in inputs_and_shapes]
-      err = tf.test.compute_gradient_error(all_inputs, all_shapes, total_sum,
-                                           [1])
+      err = gradient_checker.compute_gradient_error(all_inputs, all_shapes,
+                                                    total_sum, [1])
      self.assertLess(err, tolerance)

-  @unittest.skipUnless(tf.test.is_built_with_cuda(),
+  @unittest.skipUnless(test.is_built_with_cuda(),
                       "Test only applicable when running on GPUs")
  def testSimpleTraining(self):
    test_configs = [
-        ["lstm",
-         1e-2,
-         {
-             "num_layers": 2,
-             "num_units": 3,
-             "input_size": 4,
-             "batch_size": 3,
-             "seq_length": 4,
-             "dir_count": 1,
-         },],
-        ["gru",
-         4e-3,
-         {
-             "num_layers": 2,
-             "num_units": 3,
-             "input_size": 4,
-             "batch_size": 3,
-             "seq_length": 4,
-             "dir_count": 1,
-         },],
-        ["rnn_tanh",
-         5e-3,
-         {
-             "num_layers": 2,
-             "num_units": 3,
-             "input_size": 4,
-             "batch_size": 3,
-             "seq_length": 4,
-             "dir_count": 1,
-         },],
-        ["rnn_relu",
-         3e-1,
-         {
-             "num_layers": 2,
-             "num_units": 3,
-             "input_size": 4,
-             "batch_size": 3,
-             "seq_length": 4,
-             "dir_count": 1,
-         },],
+        [
+            "lstm",
+            1e-2,
+            {
+                "num_layers": 2,
+                "num_units": 3,
+                "input_size": 4,
+                "batch_size": 3,
+                "seq_length": 4,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "gru",
+            4e-3,
+            {
+                "num_layers": 2,
+                "num_units": 3,
+                "input_size": 4,
+                "batch_size": 3,
+                "seq_length": 4,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "rnn_tanh",
+            5e-3,
+            {
+                "num_layers": 2,
+                "num_units": 3,
+                "input_size": 4,
+                "batch_size": 3,
+                "seq_length": 4,
+                "dir_count": 1,
+            },
+        ],
+        [
+            "rnn_relu",
+            3e-1,
+            {
+                "num_layers": 2,
+                "num_units": 3,
+                "input_size": 4,
+                "batch_size": 3,
+                "seq_length": 4,
+                "dir_count": 1,
+            },
+        ],
    ]
-    with tf.Graph().as_default():
+    with ops.Graph().as_default():
      for config in test_configs:
        rnn_mode = config[0]
        tolerance = config[1]

--- a/tensorflow/contrib/deprecated/BUILD
+++ b/tensorflow/contrib/deprecated/BUILD
@@ -22,7 +22,10 @@ py_test(
    srcs_version = "PY2AND3",
    deps = [
        ":deprecated_py",
-        "//tensorflow:tensorflow_py",
+        "//tensorflow/python:array_ops",
+        "//tensorflow/python:client_testlib",
+        "//tensorflow/python:framework_for_generated_wrappers",
+        "//tensorflow/python:logging_ops",
    ],
 )


--- a/tensorflow/contrib/deprecated/summaries_test.py
+++ b/tensorflow/contrib/deprecated/summaries_test.py
@@ -18,43 +18,46 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import logging_ops
+from tensorflow.python.platform import test


-class DeprecatedSummariesTest(tf.test.TestCase):
+class DeprecatedSummariesTest(test.TestCase):

  def testScalarSummary(self):
    with self.test_session():
-      c = tf.constant(3)
-      s = tf.contrib.deprecated.scalar_summary('tag', c)
+      c = constant_op.constant(3)
+      s = logging_ops.scalar_summary('tag', c)
      self.assertEqual(s.op.type, u'ScalarSummary')

  def testHistogramSummary(self):
    with self.test_session():
-      c = tf.constant(3)
-      s = tf.contrib.deprecated.histogram_summary('tag', c)
+      c = constant_op.constant(3)
+      s = logging_ops.histogram_summary('tag', c)
      self.assertEqual(s.op.type, u'HistogramSummary')

  def testImageSummary(self):
    with self.test_session():
-      i = tf.ones((5, 4, 4, 3))
-      s = tf.contrib.deprecated.image_summary('tag', i)
+      i = array_ops.ones((5, 4, 4, 3))
+      s = logging_ops.image_summary('tag', i)
      self.assertEqual(s.op.type, u'ImageSummary')

  def testAudioSummary(self):
    with self.test_session():
-      c = tf.constant(3.0)
-      s = tf.contrib.deprecated.audio_summary('tag', c, sample_rate=8000)
+      c = constant_op.constant(3.0)
+      s = logging_ops.audio_summary('tag', c, sample_rate=8000)
      self.assertEqual(s.op.type, u'AudioSummaryV2')

  def testMergeSummary(self):
    with self.test_session():
-      c = tf.constant(3)
-      a = tf.contrib.deprecated.scalar_summary('a', c)
-      b = tf.contrib.deprecated.scalar_summary('b', c)
-      s = tf.contrib.deprecated.merge_summary([a, b])
+      c = constant_op.constant(3)
+      a = logging_ops.scalar_summary('a', c)
+      b = logging_ops.scalar_summary('b', c)
+      s = logging_ops.merge_summary([a, b])
      self.assertEqual(s.op.type, u'MergeSummary')


 if __name__ == '__main__':
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
--- a/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bernoulli_test.py
@@ -20,13 +20,19 @@ from __future__ import print_function

 import numpy as np
 import scipy.special
-import tensorflow as tf
+from tensorflow.contrib.distributions.python.ops import bernoulli
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test


-def make_bernoulli(batch_shape, dtype=tf.int32):
+def make_bernoulli(batch_shape, dtype=dtypes.int32):
  p = np.random.uniform(size=list(batch_shape))
-  p = tf.constant(p, dtype=tf.float32)
-  return tf.contrib.distributions.Bernoulli(p=p, dtype=dtype)
+  p = constant_op.constant(p, dtype=dtypes.float32)
+  return bernoulli.Bernoulli(p=p, dtype=dtype)


 def entropy(p):
@@ -34,17 +40,17 @@ def entropy(p):
  return -q * np.log(q) - p * np.log(p)


-class BernoulliTest(tf.test.TestCase):
+class BernoulliTest(test.TestCase):

  def testP(self):
    p = [0.2, 0.4]
-    dist = tf.contrib.distributions.Bernoulli(p=p)
+    dist = bernoulli.Bernoulli(p=p)
    with self.test_session():
      self.assertAllClose(p, dist.p.eval())

  def testLogits(self):
    logits = [-42., 42.]
-    dist = tf.contrib.distributions.Bernoulli(logits=logits)
+    dist = bernoulli.Bernoulli(logits=logits)
    with self.test_session():
      self.assertAllClose(logits, dist.logits.eval())

@@ -52,7 +58,7 @@ class BernoulliTest(tf.test.TestCase):
      self.assertAllClose(scipy.special.expit(logits), dist.p.eval())

    p = [0.01, 0.99, 0.42]
-    dist = tf.contrib.distributions.Bernoulli(p=p)
+    dist = bernoulli.Bernoulli(p=p)
    with self.test_session():
      self.assertAllClose(scipy.special.logit(p), dist.logits.eval())

@@ -61,20 +67,20 @@ class BernoulliTest(tf.test.TestCase):
    for p in invalid_ps:
      with self.test_session():
        with self.assertRaisesOpError("p has components greater than 1"):
-          dist = tf.contrib.distributions.Bernoulli(p=p, validate_args=True)
+          dist = bernoulli.Bernoulli(p=p, validate_args=True)
          dist.p.eval()

    invalid_ps = [-0.01, -3.]
    for p in invalid_ps:
      with self.test_session():
        with self.assertRaisesOpError("Condition x >= 0"):
-          dist = tf.contrib.distributions.Bernoulli(p=p, validate_args=True)
+          dist = bernoulli.Bernoulli(p=p, validate_args=True)
          dist.p.eval()

    valid_ps = [0.0, 0.5, 1.0]
    for p in valid_ps:
      with self.test_session():
-        dist = tf.contrib.distributions.Bernoulli(p=p)
+        dist = bernoulli.Bernoulli(p=p)
        self.assertEqual(p, dist.p.eval())  # Should not fail

  def testShapes(self):
@@ -88,7 +94,7 @@ class BernoulliTest(tf.test.TestCase):

  def testDtype(self):
    dist = make_bernoulli([])
-    self.assertEqual(dist.dtype, tf.int32)
+    self.assertEqual(dist.dtype, dtypes.int32)
    self.assertEqual(dist.dtype, dist.sample(5).dtype)
    self.assertEqual(dist.dtype, dist.mode().dtype)
    self.assertEqual(dist.p.dtype, dist.mean().dtype)
@@ -98,13 +104,13 @@ class BernoulliTest(tf.test.TestCase):
    self.assertEqual(dist.p.dtype, dist.pmf(0).dtype)
    self.assertEqual(dist.p.dtype, dist.log_pmf(0).dtype)

-    dist64 = make_bernoulli([], tf.int64)
-    self.assertEqual(dist64.dtype, tf.int64)
+    dist64 = make_bernoulli([], dtypes.int64)
+    self.assertEqual(dist64.dtype, dtypes.int64)
    self.assertEqual(dist64.dtype, dist64.sample(5).dtype)
    self.assertEqual(dist64.dtype, dist64.mode().dtype)

  def _testPmf(self, **kwargs):
-    dist = tf.contrib.distributions.Bernoulli(**kwargs)
+    dist = bernoulli.Bernoulli(**kwargs)
    with self.test_session():
      # pylint: disable=bad-continuation
      xs = [
@@ -129,14 +135,18 @@ class BernoulliTest(tf.test.TestCase):

  def testPmfCorrectBroadcastDynamicShape(self):
    with self.test_session():
-      p = tf.placeholder(dtype=tf.float32)
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      p = array_ops.placeholder(dtype=dtypes.float32)
+      dist = bernoulli.Bernoulli(p=p)
      event1 = [1, 0, 1]
      event2 = [[1, 0, 1]]
-      self.assertAllClose(dist.pmf(event1).eval({p: [0.2, 0.3, 0.4]}),
-                          [0.2, 0.7, 0.4])
-      self.assertAllClose(dist.pmf(event2).eval({p: [0.2, 0.3, 0.4]}),
-                          [[0.2, 0.7, 0.4]])
+      self.assertAllClose(
+          dist.pmf(event1).eval({
+              p: [0.2, 0.3, 0.4]
+          }), [0.2, 0.7, 0.4])
+      self.assertAllClose(
+          dist.pmf(event2).eval({
+              p: [0.2, 0.3, 0.4]
+          }), [[0.2, 0.7, 0.4]])

  def testPmfWithP(self):
    p = [[0.2, 0.4], [0.3, 0.6]]
@@ -145,49 +155,53 @@ class BernoulliTest(tf.test.TestCase):

  def testBroadcasting(self):
    with self.test_session():
-      p = tf.placeholder(tf.float32)
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      p = array_ops.placeholder(dtypes.float32)
+      dist = bernoulli.Bernoulli(p=p)
      self.assertAllClose(np.log(0.5), dist.log_pmf(1).eval({p: 0.5}))
-      self.assertAllClose(np.log([0.5, 0.5, 0.5]),
-                          dist.log_pmf([1, 1, 1]).eval({p: 0.5}))
-      self.assertAllClose(np.log([0.5, 0.5, 0.5]),
-                          dist.log_pmf(1).eval({p: [0.5, 0.5, 0.5]}))
+      self.assertAllClose(
+          np.log([0.5, 0.5, 0.5]), dist.log_pmf([1, 1, 1]).eval({
+              p: 0.5
+          }))
+      self.assertAllClose(
+          np.log([0.5, 0.5, 0.5]), dist.log_pmf(1).eval({
+              p: [0.5, 0.5, 0.5]
+          }))

  def testPmfShapes(self):
    with self.test_session():
-      p = tf.placeholder(tf.float32, shape=[None, 1])
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      p = array_ops.placeholder(dtypes.float32, shape=[None, 1])
+      dist = bernoulli.Bernoulli(p=p)
      self.assertEqual(2, len(dist.log_pmf(1).eval({p: [[0.5], [0.5]]}).shape))

    with self.test_session():
-      dist = tf.contrib.distributions.Bernoulli(p=0.5)
+      dist = bernoulli.Bernoulli(p=0.5)
      self.assertEqual(2, len(dist.log_pmf([[1], [1]]).eval().shape))

    with self.test_session():
-      dist = tf.contrib.distributions.Bernoulli(p=0.5)
+      dist = bernoulli.Bernoulli(p=0.5)
      self.assertEqual((), dist.log_pmf(1).get_shape())
      self.assertEqual((1), dist.log_pmf([1]).get_shape())
      self.assertEqual((2, 1), dist.log_pmf([[1], [1]]).get_shape())

    with self.test_session():
-      dist = tf.contrib.distributions.Bernoulli(p=[[0.5], [0.5]])
+      dist = bernoulli.Bernoulli(p=[[0.5], [0.5]])
      self.assertEqual((2, 1), dist.log_pmf(1).get_shape())

  def testBoundaryConditions(self):
    with self.test_session():
-      dist = tf.contrib.distributions.Bernoulli(p=1.0)
+      dist = bernoulli.Bernoulli(p=1.0)
      self.assertAllClose(np.nan, dist.log_pmf(0).eval())
      self.assertAllClose([np.nan], [dist.log_pmf(1).eval()])

  def testEntropyNoBatch(self):
    p = 0.2
-    dist = tf.contrib.distributions.Bernoulli(p=p)
+    dist = bernoulli.Bernoulli(p=p)
    with self.test_session():
      self.assertAllClose(dist.entropy().eval(), entropy(p))

  def testEntropyWithBatch(self):
    p = [[0.1, 0.7], [0.2, 0.6]]
-    dist = tf.contrib.distributions.Bernoulli(p=p, validate_args=False)
+    dist = bernoulli.Bernoulli(p=p, validate_args=False)
    with self.test_session():
      self.assertAllClose(dist.entropy().eval(), [[entropy(0.1), entropy(0.7)],
                                                  [entropy(0.2), entropy(0.6)]])
@@ -195,11 +209,11 @@ class BernoulliTest(tf.test.TestCase):
  def testSampleN(self):
    with self.test_session():
      p = [0.2, 0.6]
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      dist = bernoulli.Bernoulli(p=p)
      n = 100000
      samples = dist.sample(n)
      samples.set_shape([n, 2])
-      self.assertEqual(samples.dtype, tf.int32)
+      self.assertEqual(samples.dtype, dtypes.int32)
      sample_values = samples.eval()
      self.assertTrue(np.all(sample_values >= 0))
      self.assertTrue(np.all(sample_values <= 1))
@@ -210,48 +224,49 @@ class BernoulliTest(tf.test.TestCase):
      self.assertEqual(set([0, 1]), set(sample_values.flatten()))
      # In this test we're just interested in verifying there isn't a crash
      # owing to mismatched types. b/30940152
-      dist = tf.contrib.distributions.Bernoulli(np.log([.2, .4]))
-      self.assertAllEqual(
-          (1, 2), dist.sample(1, seed=42).get_shape().as_list())
+      dist = bernoulli.Bernoulli(np.log([.2, .4]))
+      self.assertAllEqual((1, 2), dist.sample(1, seed=42).get_shape().as_list())

  def testSampleActsLikeSampleN(self):
    with self.test_session() as sess:
      p = [0.2, 0.6]
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      dist = bernoulli.Bernoulli(p=p)
      n = 1000
      seed = 42
-      self.assertAllEqual(dist.sample(n, seed).eval(),
-                          dist.sample(n, seed).eval())
-      n = tf.placeholder(tf.int32)
-      sample, sample = sess.run([dist.sample(n, seed),
-                                 dist.sample(n, seed)],
+      self.assertAllEqual(
+          dist.sample(n, seed).eval(), dist.sample(n, seed).eval())
+      n = array_ops.placeholder(dtypes.int32)
+      sample, sample = sess.run([dist.sample(n, seed), dist.sample(n, seed)],
                                feed_dict={n: 1000})
      self.assertAllEqual(sample, sample)

  def testMean(self):
    with self.test_session():
      p = np.array([[0.2, 0.7], [0.5, 0.4]], dtype=np.float32)
-      dist = tf.contrib.distributions.Bernoulli(p=p)
+      dist = bernoulli.Bernoulli(p=p)
      self.assertAllEqual(dist.mean().eval(), p)

  def testVarianceAndStd(self):
    var = lambda p: p * (1. - p)
    with self.test_session():
      p = [[0.2, 0.7], [0.5, 0.4]]
-      dist = tf.contrib.distributions.Bernoulli(p=p)
-      self.assertAllClose(dist.variance().eval(),
-                          np.array([[var(0.2), var(0.7)], [var(0.5), var(0.4)]],
-                                   dtype=np.float32))
-      self.assertAllClose(dist.std().eval(),
-                          np.array([[np.sqrt(var(0.2)), np.sqrt(var(0.7))],
-                                    [np.sqrt(var(0.5)), np.sqrt(var(0.4))]],
-                                   dtype=np.float32))
+      dist = bernoulli.Bernoulli(p=p)
+      self.assertAllClose(
+          dist.variance().eval(),
+          np.array(
+              [[var(0.2), var(0.7)], [var(0.5), var(0.4)]], dtype=np.float32))
+      self.assertAllClose(
+          dist.std().eval(),
+          np.array(
+              [[np.sqrt(var(0.2)), np.sqrt(var(0.7))],
+               [np.sqrt(var(0.5)), np.sqrt(var(0.4))]],
+              dtype=np.float32))

  def testBernoulliWithSigmoidP(self):
    p = np.array([8.3, 4.2])
-    dist = tf.contrib.distributions.BernoulliWithSigmoidP(p=p)
+    dist = bernoulli.BernoulliWithSigmoidP(p=p)
    with self.test_session():
-      self.assertAllClose(tf.nn.sigmoid(p).eval(), dist.p.eval())
+      self.assertAllClose(math_ops.sigmoid(p).eval(), dist.p.eval())

  def testBernoulliBernoulliKL(self):
    with self.test_session() as sess:
@@ -259,19 +274,18 @@ class BernoulliTest(tf.test.TestCase):
      a_p = np.array([0.5] * batch_size, dtype=np.float32)
      b_p = np.array([0.4] * batch_size, dtype=np.float32)

-      a = tf.contrib.distributions.Bernoulli(p=a_p)
-      b = tf.contrib.distributions.Bernoulli(p=b_p)
+      a = bernoulli.Bernoulli(p=a_p)
+      b = bernoulli.Bernoulli(p=b_p)

-      kl = tf.contrib.distributions.kl(a, b)
+      kl = kullback_leibler.kl(a, b)
      kl_val = sess.run(kl)

-      kl_expected = (
-          a_p * np.log(a_p / b_p) +
-          (1. - a_p) * np.log((1. - a_p) / (1. - b_p)))
+      kl_expected = (a_p * np.log(a_p / b_p) + (1. - a_p) * np.log(
+          (1. - a_p) / (1. - b_p)))

      self.assertEqual(kl.get_shape(), (batch_size,))
      self.assertAllClose(kl_val, kl_expected)


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/beta_test.py
@@ -18,46 +18,56 @@ from __future__ import print_function

 import numpy as np
 from scipy import stats, special
-import tensorflow as tf
+from tensorflow.contrib.distributions.python.ops import beta as beta_lib
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
+from tensorflow.python.client import session
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import random_seed
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test


-class BetaTest(tf.test.TestCase):
+class BetaTest(test.TestCase):

  def testSimpleShapes(self):
    with self.test_session():
      a = np.random.rand(3)
      b = np.random.rand(3)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertAllEqual([], dist.event_shape().eval())
      self.assertAllEqual([3], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([3]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), dist.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([3]), dist.get_batch_shape())

  def testComplexShapes(self):
    with self.test_session():
      a = np.random.rand(3, 2, 2)
      b = np.random.rand(3, 2, 2)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertAllEqual([], dist.event_shape().eval())
      self.assertAllEqual([3, 2, 2], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([3, 2, 2]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), dist.get_event_shape())
+      self.assertEqual(
+          tensor_shape.TensorShape([3, 2, 2]), dist.get_batch_shape())

  def testComplexShapesBroadcast(self):
    with self.test_session():
      a = np.random.rand(3, 2, 2)
      b = np.random.rand(2, 2)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertAllEqual([], dist.event_shape().eval())
      self.assertAllEqual([3, 2, 2], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([3, 2, 2]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), dist.get_event_shape())
+      self.assertEqual(
+          tensor_shape.TensorShape([3, 2, 2]), dist.get_batch_shape())

  def testAlphaProperty(self):
    a = [[1., 2, 3]]
    b = [[2., 4, 3]]
    with self.test_session():
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual([1, 3], dist.a.get_shape())
      self.assertAllClose(a, dist.a.eval())

@@ -65,7 +75,7 @@ class BetaTest(tf.test.TestCase):
    a = [[1., 2, 3]]
    b = [[2., 4, 3]]
    with self.test_session():
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual([1, 3], dist.b.get_shape())
      self.assertAllClose(b, dist.b.eval())

@@ -73,7 +83,7 @@ class BetaTest(tf.test.TestCase):
    a = [[1., 2, 3]]
    b = [[2., 4, 3]]
    with self.test_session():
-      dist = tf.contrib.distributions.Beta(a, b, validate_args=True)
+      dist = beta_lib.Beta(a, b, validate_args=True)
      dist.pdf([.1, .3, .6]).eval()
      dist.pdf([.2, .3, .5]).eval()
      # Either condition can trigger.
@@ -89,9 +99,9 @@ class BetaTest(tf.test.TestCase):
      a = [1., 2]
      b = [1., 2]
      x = [.5, .5]
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      pdf = dist.pdf(x)
-      self.assertAllClose([1., 3./2], pdf.eval())
+      self.assertAllClose([1., 3. / 2], pdf.eval())
      self.assertEqual((2,), pdf.get_shape())

  def testPdfTwoBatchesNontrivialX(self):
@@ -99,9 +109,9 @@ class BetaTest(tf.test.TestCase):
      a = [1., 2]
      b = [1., 2]
      x = [.3, .7]
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      pdf = dist.pdf(x)
-      self.assertAllClose([1, 63./50], pdf.eval())
+      self.assertAllClose([1, 63. / 50], pdf.eval())
      self.assertEqual((2,), pdf.get_shape())

  def testPdfUniformZeroBatch(self):
@@ -110,7 +120,7 @@ class BetaTest(tf.test.TestCase):
      a = 1.
      b = 1.
      x = np.array([.1, .2, .3, .5, .8], dtype=np.float32)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      pdf = dist.pdf(x)
      self.assertAllClose([1.] * 5, pdf.eval())
      self.assertEqual((5,), pdf.get_shape())
@@ -120,9 +130,9 @@ class BetaTest(tf.test.TestCase):
      a = [[1., 2]]
      b = [[1., 2]]
      x = [[.5, .5], [.3, .7]]
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      pdf = dist.pdf(x)
-      self.assertAllClose([[1., 3./2], [1., 63./50]], pdf.eval())
+      self.assertAllClose([[1., 3. / 2], [1., 63. / 50]], pdf.eval())
      self.assertEqual((2, 2), pdf.get_shape())

  def testPdfAlphaStretchedInBroadcastWhenLowerRank(self):
@@ -130,8 +140,8 @@ class BetaTest(tf.test.TestCase):
      a = [1., 2]
      b = [1., 2]
      x = [[.5, .5], [.2, .8]]
-      pdf = tf.contrib.distributions.Beta(a, b).pdf(x)
-      self.assertAllClose([[1., 3./2], [1., 24./25]], pdf.eval())
+      pdf = beta_lib.Beta(a, b).pdf(x)
+      self.assertAllClose([[1., 3. / 2], [1., 24. / 25]], pdf.eval())
      self.assertEqual((2, 2), pdf.get_shape())

  def testPdfXStretchedInBroadcastWhenSameRank(self):
@@ -139,8 +149,8 @@ class BetaTest(tf.test.TestCase):
      a = [[1., 2], [2., 3]]
      b = [[1., 2], [2., 3]]
      x = [[.5, .5]]
-      pdf = tf.contrib.distributions.Beta(a, b).pdf(x)
-      self.assertAllClose([[1., 3./2], [3./2, 15./8]], pdf.eval())
+      pdf = beta_lib.Beta(a, b).pdf(x)
+      self.assertAllClose([[1., 3. / 2], [3. / 2, 15. / 8]], pdf.eval())
      self.assertEqual((2, 2), pdf.get_shape())

  def testPdfXStretchedInBroadcastWhenLowerRank(self):
@@ -148,77 +158,77 @@ class BetaTest(tf.test.TestCase):
      a = [[1., 2], [2., 3]]
      b = [[1., 2], [2., 3]]
      x = [.5, .5]
-      pdf = tf.contrib.distributions.Beta(a, b).pdf(x)
-      self.assertAllClose([[1., 3./2], [3./2, 15./8]], pdf.eval())
+      pdf = beta_lib.Beta(a, b).pdf(x)
+      self.assertAllClose([[1., 3. / 2], [3. / 2, 15. / 8]], pdf.eval())
      self.assertEqual((2, 2), pdf.get_shape())

  def testBetaMean(self):
-    with tf.Session():
+    with session.Session():
      a = [1., 2, 3]
      b = [2., 4, 1.2]
      expected_mean = stats.beta.mean(a, b)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual(dist.mean().get_shape(), (3,))
      self.assertAllClose(expected_mean, dist.mean().eval())

  def testBetaVariance(self):
-    with tf.Session():
+    with session.Session():
      a = [1., 2, 3]
      b = [2., 4, 1.2]
      expected_variance = stats.beta.var(a, b)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual(dist.variance().get_shape(), (3,))
      self.assertAllClose(expected_variance, dist.variance().eval())

  def testBetaMode(self):
-    with tf.Session():
+    with session.Session():
      a = np.array([1.1, 2, 3])
      b = np.array([2., 4, 1.2])
-      expected_mode = (a - 1)/(a + b - 2)
-      dist = tf.contrib.distributions.Beta(a, b)
+      expected_mode = (a - 1) / (a + b - 2)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual(dist.mode().get_shape(), (3,))
      self.assertAllClose(expected_mode, dist.mode().eval())

  def testBetaModeInvalid(self):
-    with tf.Session():
+    with session.Session():
      a = np.array([1., 2, 3])
      b = np.array([2., 4, 1.2])
-      dist = tf.contrib.distributions.Beta(a, b, allow_nan_stats=False)
+      dist = beta_lib.Beta(a, b, allow_nan_stats=False)
      with self.assertRaisesOpError("Condition x < y.*"):
        dist.mode().eval()

      a = np.array([2., 2, 3])
      b = np.array([1., 4, 1.2])
-      dist = tf.contrib.distributions.Beta(a, b, allow_nan_stats=False)
+      dist = beta_lib.Beta(a, b, allow_nan_stats=False)
      with self.assertRaisesOpError("Condition x < y.*"):
        dist.mode().eval()

  def testBetaModeEnableAllowNanStats(self):
-    with tf.Session():
+    with session.Session():
      a = np.array([1., 2, 3])
      b = np.array([2., 4, 1.2])
-      dist = tf.contrib.distributions.Beta(a, b, allow_nan_stats=True)
+      dist = beta_lib.Beta(a, b, allow_nan_stats=True)

-      expected_mode = (a - 1)/(a + b - 2)
+      expected_mode = (a - 1) / (a + b - 2)
      expected_mode[0] = np.nan
      self.assertEqual((3,), dist.mode().get_shape())
      self.assertAllClose(expected_mode, dist.mode().eval())

      a = np.array([2., 2, 3])
      b = np.array([1., 4, 1.2])
-      dist = tf.contrib.distributions.Beta(a, b, allow_nan_stats=True)
+      dist = beta_lib.Beta(a, b, allow_nan_stats=True)

-      expected_mode = (a - 1)/(a + b - 2)
+      expected_mode = (a - 1) / (a + b - 2)
      expected_mode[0] = np.nan
      self.assertEqual((3,), dist.mode().get_shape())
      self.assertAllClose(expected_mode, dist.mode().eval())

  def testBetaEntropy(self):
-    with tf.Session():
+    with session.Session():
      a = [1., 2, 3]
      b = [2., 4, 1.2]
      expected_entropy = stats.beta.entropy(a, b)
-      dist = tf.contrib.distributions.Beta(a, b)
+      dist = beta_lib.Beta(a, b)
      self.assertEqual(dist.entropy().get_shape(), (3,))
      self.assertAllClose(expected_entropy, dist.entropy().eval())

@@ -226,8 +236,8 @@ class BetaTest(tf.test.TestCase):
    with self.test_session():
      a = 1.
      b = 2.
-      beta = tf.contrib.distributions.Beta(a, b)
-      n = tf.constant(100000)
+      beta = beta_lib.Beta(a, b)
+      n = constant_op.constant(100000)
      samples = beta.sample(n)
      sample_values = samples.eval()
      self.assertEqual(sample_values.shape, (100000,))
@@ -235,15 +245,15 @@ class BetaTest(tf.test.TestCase):
      self.assertLess(
          stats.kstest(
              # Beta is a univariate distribution.
-              sample_values, stats.beta(a=1., b=2.).cdf)[0],
+              sample_values,
+              stats.beta(
+                  a=1., b=2.).cdf)[0],
          0.01)
      # The standard error of the sample mean is 1 / (sqrt(18 * n))
-      self.assertAllClose(sample_values.mean(axis=0),
-                          stats.beta.mean(a, b),
-                          atol=1e-2)
-      self.assertAllClose(np.cov(sample_values, rowvar=0),
-                          stats.beta.var(a, b),
-                          atol=1e-1)
+      self.assertAllClose(
+          sample_values.mean(axis=0), stats.beta.mean(a, b), atol=1e-2)
+      self.assertAllClose(
+          np.cov(sample_values, rowvar=0), stats.beta.var(a, b), atol=1e-1)

  # Test that sampling with the same seed twice gives the same results.
  def testBetaSampleMultipleTimes(self):
@@ -252,12 +262,12 @@ class BetaTest(tf.test.TestCase):
      b_val = 2.
      n_val = 100

-      tf.set_random_seed(654321)
-      beta1 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta1")
+      random_seed.set_random_seed(654321)
+      beta1 = beta_lib.Beta(a=a_val, b=b_val, name="beta1")
      samples1 = beta1.sample(n_val, seed=123456).eval()

-      tf.set_random_seed(654321)
-      beta2 = tf.contrib.distributions.Beta(a=a_val, b=b_val, name="beta2")
+      random_seed.set_random_seed(654321)
+      beta2 = beta_lib.Beta(a=a_val, b=b_val, name="beta2")
      samples2 = beta2.sample(n_val, seed=123456).eval()

      self.assertAllClose(samples1, samples2)
@@ -266,8 +276,8 @@ class BetaTest(tf.test.TestCase):
    with self.test_session():
      a = np.random.rand(3, 2, 2).astype(np.float32)
      b = np.random.rand(3, 2, 2).astype(np.float32)
-      beta = tf.contrib.distributions.Beta(a, b)
-      n = tf.constant(100000)
+      beta = beta_lib.Beta(a, b)
+      n = constant_op.constant(100000)
      samples = beta.sample(n)
      sample_values = samples.eval()
      self.assertEqual(sample_values.shape, (100000, 3, 2, 2))
@@ -284,7 +294,7 @@ class BetaTest(tf.test.TestCase):
        a = 10. * np.random.random(shape).astype(dt)
        b = 10. * np.random.random(shape).astype(dt)
        x = np.random.random(shape).astype(dt)
-        actual = tf.contrib.distributions.Beta(a, b).cdf(x).eval()
+        actual = beta_lib.Beta(a, b).cdf(x).eval()
        self.assertAllEqual(np.ones(shape, dtype=np.bool), 0. <= x)
        self.assertAllEqual(np.ones(shape, dtype=np.bool), 1. >= x)
        self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=1e-4, atol=0)
@@ -296,7 +306,7 @@ class BetaTest(tf.test.TestCase):
        a = 10. * np.random.random(shape).astype(dt)
        b = 10. * np.random.random(shape).astype(dt)
        x = np.random.random(shape).astype(dt)
-        actual = tf.exp(tf.contrib.distributions.Beta(a, b).log_cdf(x)).eval()
+        actual = math_ops.exp(beta_lib.Beta(a, b).log_cdf(x)).eval()
        self.assertAllEqual(np.ones(shape, dtype=np.bool), 0. <= x)
        self.assertAllEqual(np.ones(shape, dtype=np.bool), 1. >= x)
        self.assertAllClose(stats.beta.cdf(x, a, b), actual, rtol=1e-4, atol=0)
@@ -304,44 +314,44 @@ class BetaTest(tf.test.TestCase):
  def testBetaWithSoftplusAB(self):
    with self.test_session():
      a, b = -4.2, -9.1
-      dist = tf.contrib.distributions.BetaWithSoftplusAB(a, b)
-      self.assertAllClose(tf.nn.softplus(a).eval(), dist.a.eval())
-      self.assertAllClose(tf.nn.softplus(b).eval(), dist.b.eval())
+      dist = beta_lib.BetaWithSoftplusAB(a, b)
+      self.assertAllClose(nn_ops.softplus(a).eval(), dist.a.eval())
+      self.assertAllClose(nn_ops.softplus(b).eval(), dist.b.eval())

  def testBetaBetaKL(self):
    with self.test_session() as sess:
-      for shape in [(10,), (4,5)]:
-        a1 = 6.0*np.random.random(size=shape) + 1e-4
-        b1 = 6.0*np.random.random(size=shape) + 1e-4 
-        a2 = 6.0*np.random.random(size=shape) + 1e-4
-        b2 = 6.0*np.random.random(size=shape) + 1e-4 
+      for shape in [(10,), (4, 5)]:
+        a1 = 6.0 * np.random.random(size=shape) + 1e-4
+        b1 = 6.0 * np.random.random(size=shape) + 1e-4
+        a2 = 6.0 * np.random.random(size=shape) + 1e-4
+        b2 = 6.0 * np.random.random(size=shape) + 1e-4
        # Take inverse softplus of values to test BetaWithSoftplusAB
        a1_sp = np.log(np.exp(a1) - 1.0)
        b1_sp = np.log(np.exp(b1) - 1.0)
        a2_sp = np.log(np.exp(a2) - 1.0)
        b2_sp = np.log(np.exp(b2) - 1.0)

-        d1 = tf.contrib.distributions.Beta(a=a1, b=b1)
-        d2 = tf.contrib.distributions.Beta(a=a2, b=b2)
-        d1_sp = tf.contrib.distributions.BetaWithSoftplusAB(a=a1_sp, b=b1_sp)
-        d2_sp = tf.contrib.distributions.BetaWithSoftplusAB(a=a2_sp, b=b2_sp)
+        d1 = beta_lib.Beta(a=a1, b=b1)
+        d2 = beta_lib.Beta(a=a2, b=b2)
+        d1_sp = beta_lib.BetaWithSoftplusAB(a=a1_sp, b=b1_sp)
+        d2_sp = beta_lib.BetaWithSoftplusAB(a=a2_sp, b=b2_sp)

-        kl_expected = (special.betaln(a2, b2) - special.betaln(a1, b1)
-                     + (a1 - a2)*special.digamma(a1)
-                     + (b1 - b2)*special.digamma(b1)
-                     + (a2 - a1 + b2 - b1)*special.digamma(a1 + b1))
+        kl_expected = (special.betaln(a2, b2) - special.betaln(a1, b1) +
+                       (a1 - a2) * special.digamma(a1) +
+                       (b1 - b2) * special.digamma(b1) +
+                       (a2 - a1 + b2 - b1) * special.digamma(a1 + b1))

        for dist1 in [d1, d1_sp]:
          for dist2 in [d2, d2_sp]:
-            kl = tf.contrib.distributions.kl(dist1, dist2)
+            kl = kullback_leibler.kl(dist1, dist2)
            kl_val = sess.run(kl)
            self.assertEqual(kl.get_shape(), shape)
            self.assertAllClose(kl_val, kl_expected)
-        
+
        # Make sure KL(d1||d1) is 0
-        kl_same = sess.run(tf.contrib.distributions.kl(d1, d1))
+        kl_same = sess.run(kullback_leibler.kl(d1, d1))
        self.assertAllClose(kl_same, np.zeros_like(kl_expected))


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/bijector_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/binomial_test.py
@@ -18,42 +18,45 @@ from __future__ import print_function

 import numpy as np
 from scipy import stats
-import tensorflow as tf
+from tensorflow.contrib.distributions.python.ops import binomial
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.platform import test


-class BinomialTest(tf.test.TestCase):
+class BinomialTest(test.TestCase):

  def testSimpleShapes(self):
    with self.test_session():
      p = np.float32(np.random.beta(1, 1))
-      binom = tf.contrib.distributions.Binomial(n=1., p=p)
+      binom = binomial.Binomial(n=1., p=p)
      self.assertAllEqual([], binom.event_shape().eval())
      self.assertAllEqual([], binom.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([]), binom.get_event_shape())
-      self.assertEqual(tf.TensorShape([]), binom.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), binom.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), binom.get_batch_shape())

  def testComplexShapes(self):
    with self.test_session():
      p = np.random.beta(1, 1, size=(3, 2)).astype(np.float32)
      n = [[3., 2], [4, 5], [6, 7]]
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      self.assertAllEqual([], binom.event_shape().eval())
      self.assertAllEqual([3, 2], binom.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([]), binom.get_event_shape())
-      self.assertEqual(tf.TensorShape([3, 2]), binom.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), binom.get_event_shape())
+      self.assertEqual(
+          tensor_shape.TensorShape([3, 2]), binom.get_batch_shape())

  def testNProperty(self):
    p = [[0.1, 0.2, 0.7], [0.2, 0.3, 0.5]]
    n = [[3.], [4]]
    with self.test_session():
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      self.assertEqual((2, 1), binom.n.get_shape())
      self.assertAllClose(n, binom.n.eval())

  def testPProperty(self):
    p = [[0.1, 0.2, 0.7]]
    with self.test_session():
-      binom = tf.contrib.distributions.Binomial(n=3., p=p)
+      binom = binomial.Binomial(n=3., p=p)
      self.assertEqual((1, 3), binom.p.get_shape())
      self.assertEqual((1, 3), binom.logits.get_shape())
      self.assertAllClose(p, binom.p.eval())
@@ -61,7 +64,7 @@ class BinomialTest(tf.test.TestCase):
  def testLogitsProperty(self):
    logits = [[0., 9., -0.5]]
    with self.test_session():
-      binom = tf.contrib.distributions.Binomial(n=3., logits=logits)
+      binom = binomial.Binomial(n=3., logits=logits)
      self.assertEqual((1, 3), binom.p.get_shape())
      self.assertEqual((1, 3), binom.logits.get_shape())
      self.assertAllClose(logits, binom.logits.eval())
@@ -70,7 +73,7 @@ class BinomialTest(tf.test.TestCase):
    p = [[0.1, 0.2, 0.7]]
    n = [[5.]]
    with self.test_session():
-      binom = tf.contrib.distributions.Binomial(n=n, p=p, validate_args=True)
+      binom = binomial.Binomial(n=n, p=p, validate_args=True)
      binom.pmf([2., 3, 2]).eval()
      binom.pmf([3., 1, 2]).eval()
      with self.assertRaisesOpError("Condition x >= 0.*"):
@@ -83,14 +86,14 @@ class BinomialTest(tf.test.TestCase):
    n = [[5.]]
    with self.test_session():
      # No errors with integer n.
-      binom = tf.contrib.distributions.Binomial(n=n, p=p, validate_args=True)
+      binom = binomial.Binomial(n=n, p=p, validate_args=True)
      binom.pmf([2., 3, 2]).eval()
      binom.pmf([3., 1, 2]).eval()
      # Both equality and integer checking fail.
      with self.assertRaisesOpError("Condition x == y.*"):
        binom.pmf([1.0, 2.5, 1.5]).eval()

-      binom = tf.contrib.distributions.Binomial(n=n, p=p, validate_args=False)
+      binom = binomial.Binomial(n=n, p=p, validate_args=False)
      binom.pmf([1., 2., 3.]).eval()
      # Non-integer arguments work.
      binom.pmf([1.0, 2.5, 1.5]).eval()
@@ -100,7 +103,7 @@ class BinomialTest(tf.test.TestCase):
      # Both zero-batches.  No broadcast
      p = 0.5
      counts = 1.
-      pmf = tf.contrib.distributions.Binomial(n=1., p=p).pmf(counts)
+      pmf = binomial.Binomial(n=1., p=p).pmf(counts)
      self.assertAllClose(0.5, pmf.eval())
      self.assertEqual((), pmf.get_shape())

@@ -109,7 +112,7 @@ class BinomialTest(tf.test.TestCase):
      # Both zero-batches.  No broadcast
      p = 0.1
      counts = 3.
-      binom = tf.contrib.distributions.Binomial(n=5., p=p)
+      binom = binomial.Binomial(n=5., p=p)
      pmf = binom.pmf(counts)
      self.assertAllClose(stats.binom.pmf(counts, n=5., p=p), pmf.eval())
      self.assertEqual((), pmf.get_shape())
@@ -118,7 +121,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      p = [[0.1, 0.9]]
      counts = [[1., 2.]]
-      pmf = tf.contrib.distributions.Binomial(n=3., p=p).pmf(counts)
+      pmf = binomial.Binomial(n=3., p=p).pmf(counts)
      self.assertAllClose(stats.binom.pmf(counts, n=3., p=p), pmf.eval())
      self.assertEqual((1, 2), pmf.get_shape())

@@ -126,7 +129,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      p = [0.1, 0.4]
      counts = [[1.], [0.]]
-      pmf = tf.contrib.distributions.Binomial(n=1., p=p).pmf(counts)
+      pmf = binomial.Binomial(n=1., p=p).pmf(counts)
      self.assertAllClose([[0.1, 0.4], [0.9, 0.6]], pmf.eval())
      self.assertEqual((2, 2), pmf.get_shape())

@@ -134,7 +137,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      n = 5.
      p = [0.1, 0.2, 0.7]
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      expected_means = stats.binom.mean(n, p)
      self.assertEqual((3,), binom.mean().get_shape())
      self.assertAllClose(expected_means, binom.mean().eval())
@@ -143,7 +146,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      n = 5.
      p = [0.1, 0.2, 0.7]
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      expected_variances = stats.binom.var(n, p)
      self.assertEqual((3,), binom.variance().get_shape())
      self.assertAllClose(expected_variances, binom.variance().eval())
@@ -152,7 +155,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      n = 5.
      p = [0.1, 0.2, 0.7]
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      expected_modes = [0., 1, 4]
      self.assertEqual((3,), binom.mode().get_shape())
      self.assertAllClose(expected_modes, binom.mode().eval())
@@ -161,7 +164,7 @@ class BinomialTest(tf.test.TestCase):
    with self.test_session():
      n = 9.
      p = [0.1, 0.2, 0.7]
-      binom = tf.contrib.distributions.Binomial(n=n, p=p)
+      binom = binomial.Binomial(n=n, p=p)
      # For the case where (n + 1) * p is an integer, the modes are:
      # (n + 1) * p and (n + 1) * p - 1. In this case, we get back
      # the larger of the two modes.
@@ -169,5 +172,6 @@ class BinomialTest(tf.test.TestCase):
      self.assertEqual((3,), binom.mode().get_shape())
      self.assertAllClose(expected_modes, binom.mode().eval())

+
 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/categorical_test.py
@@ -19,22 +19,29 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf

+from tensorflow.contrib.distributions.python.ops import categorical
+from tensorflow.contrib.distributions.python.ops import kullback_leibler
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test


-def make_categorical(batch_shape, num_classes, dtype=tf.int32):
-  logits = tf.random_uniform(
-      list(batch_shape) + [num_classes], -10, 10, dtype=tf.float32) - 50.
-  return tf.contrib.distributions.Categorical(logits, dtype=dtype)
+def make_categorical(batch_shape, num_classes, dtype=dtypes.int32):
+  logits = random_ops.random_uniform(
+      list(batch_shape) + [num_classes], -10, 10, dtype=dtypes.float32) - 50.
+  return categorical.Categorical(logits, dtype=dtype)


-class CategoricalTest(tf.test.TestCase):
+class CategoricalTest(test.TestCase):

  def testP(self):
    p = [0.2, 0.8]
-    dist = tf.contrib.distributions.Categorical(p=p)
+    dist = categorical.Categorical(p=p)
    with self.test_session():
      self.assertAllClose(p, dist.p.eval())
      self.assertAllEqual([2], dist.logits.get_shape())
@@ -42,7 +49,7 @@ class CategoricalTest(tf.test.TestCase):
  def testLogits(self):
    p = np.array([0.2, 0.8], dtype=np.float32)
    logits = np.log(p) - 50.
-    dist = tf.contrib.distributions.Categorical(logits=logits)
+    dist = categorical.Categorical(logits=logits)
    with self.test_session():
      self.assertAllEqual([2], dist.p.get_shape())
      self.assertAllEqual([2], dist.logits.get_shape())
@@ -63,7 +70,9 @@ class CategoricalTest(tf.test.TestCase):
        self.assertEqual(10, tensor_util.constant_value(dist.num_classes))

      for batch_shape in ([], [1], [2, 3, 4]):
-        dist = make_categorical(batch_shape, tf.constant(10, dtype=tf.int32))
+        dist = make_categorical(
+            batch_shape, constant_op.constant(
+                10, dtype=dtypes.int32))
        self.assertAllEqual(len(batch_shape), dist.get_batch_shape().ndims)
        self.assertAllEqual(batch_shape, dist.batch_shape().eval())
        self.assertAllEqual([], dist.get_event_shape())
@@ -71,26 +80,28 @@ class CategoricalTest(tf.test.TestCase):
        self.assertEqual(10, dist.num_classes.eval())

  def testDtype(self):
-    dist = make_categorical([], 5, dtype=tf.int32)
-    self.assertEqual(dist.dtype, tf.int32)
+    dist = make_categorical([], 5, dtype=dtypes.int32)
+    self.assertEqual(dist.dtype, dtypes.int32)
    self.assertEqual(dist.dtype, dist.sample(5).dtype)
    self.assertEqual(dist.dtype, dist.mode().dtype)
-    dist = make_categorical([], 5, dtype=tf.int64)
-    self.assertEqual(dist.dtype, tf.int64)
+    dist = make_categorical([], 5, dtype=dtypes.int64)
+    self.assertEqual(dist.dtype, dtypes.int64)
    self.assertEqual(dist.dtype, dist.sample(5).dtype)
    self.assertEqual(dist.dtype, dist.mode().dtype)
-    self.assertEqual(dist.p.dtype, tf.float32)
-    self.assertEqual(dist.logits.dtype, tf.float32)
+    self.assertEqual(dist.p.dtype, dtypes.float32)
+    self.assertEqual(dist.logits.dtype, dtypes.float32)
    self.assertEqual(dist.logits.dtype, dist.entropy().dtype)
-    self.assertEqual(dist.logits.dtype, dist.pmf(
-        np.array(0, dtype=np.int64)).dtype)
-    self.assertEqual(dist.logits.dtype, dist.log_pmf(
-        np.array(0, dtype=np.int64)).dtype)
+    self.assertEqual(
+        dist.logits.dtype, dist.pmf(np.array(
+            0, dtype=np.int64)).dtype)
+    self.assertEqual(
+        dist.logits.dtype, dist.log_pmf(np.array(
+            0, dtype=np.int64)).dtype)

  def testUnknownShape(self):
    with self.test_session():
-      logits = tf.placeholder(dtype=tf.float32)
-      dist = tf.contrib.distributions.Categorical(logits)
+      logits = array_ops.placeholder(dtype=dtypes.float32)
+      dist = categorical.Categorical(logits)
      sample = dist.sample()
      # Will sample class 1.
      sample_value = sample.eval(feed_dict={logits: [-1000.0, 1000.0]})
@@ -103,70 +114,72 @@ class CategoricalTest(tf.test.TestCase):

  def testPMFWithBatch(self):
    histograms = [[0.2, 0.8], [0.6, 0.4]]
-    dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+    dist = categorical.Categorical(math_ops.log(histograms) - 50.)
    with self.test_session():
      self.assertAllClose(dist.pmf([0, 1]).eval(), [0.2, 0.4])

  def testPMFNoBatch(self):
    histograms = [0.2, 0.8]
-    dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+    dist = categorical.Categorical(math_ops.log(histograms) - 50.)
    with self.test_session():
      self.assertAllClose(dist.pmf(0).eval(), 0.2)

  def testLogPMF(self):
    logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50.
-    dist = tf.contrib.distributions.Categorical(logits)
+    dist = categorical.Categorical(logits)
    with self.test_session():
      self.assertAllClose(dist.log_pmf([0, 1]).eval(), np.log([0.2, 0.4]))

  def testEntropyNoBatch(self):
    logits = np.log([0.2, 0.8]) - 50.
-    dist = tf.contrib.distributions.Categorical(logits)
+    dist = categorical.Categorical(logits)
    with self.test_session():
-      self.assertAllClose(
-          dist.entropy().eval(),
-          -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)))
+      self.assertAllClose(dist.entropy().eval(),
+                          -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)))

  def testEntropyWithBatch(self):
    logits = np.log([[0.2, 0.8], [0.6, 0.4]]) - 50.
-    dist = tf.contrib.distributions.Categorical(logits)
+    dist = categorical.Categorical(logits)
    with self.test_session():
-      self.assertAllClose(dist.entropy().eval(),
-                          [-(0.2 * np.log(0.2) + 0.8 * np.log(0.8)),
-                           -(0.6 * np.log(0.6) + 0.4 * np.log(0.4))])
+      self.assertAllClose(dist.entropy().eval(), [
+          -(0.2 * np.log(0.2) + 0.8 * np.log(0.8)),
+          -(0.6 * np.log(0.6) + 0.4 * np.log(0.4))
+      ])

  def testSample(self):
    with self.test_session():
      histograms = [[[0.2, 0.8], [0.4, 0.6]]]
-      dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+      dist = categorical.Categorical(math_ops.log(histograms) - 50.)
      n = 10000
      samples = dist.sample(n, seed=123)
      samples.set_shape([n, 1, 2])
-      self.assertEqual(samples.dtype, tf.int32)
+      self.assertEqual(samples.dtype, dtypes.int32)
      sample_values = samples.eval()
      self.assertFalse(np.any(sample_values < 0))
      self.assertFalse(np.any(sample_values > 1))
      self.assertAllClose(
-          [[0.2, 0.4]], np.mean(sample_values == 0, axis=0), atol=1e-2)
+          [[0.2, 0.4]], np.mean(
+              sample_values == 0, axis=0), atol=1e-2)
      self.assertAllClose(
-          [[0.8, 0.6]], np.mean(sample_values == 1, axis=0), atol=1e-2)
+          [[0.8, 0.6]], np.mean(
+              sample_values == 1, axis=0), atol=1e-2)

  def testSampleWithSampleShape(self):
    with self.test_session():
      histograms = [[[0.2, 0.8], [0.4, 0.6]]]
-      dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+      dist = categorical.Categorical(math_ops.log(histograms) - 50.)
      samples = dist.sample((100, 100), seed=123)
      prob = dist.prob(samples)
      prob_val = prob.eval()
-      self.assertAllClose([0.2**2 + 0.8**2], [prob_val[:, :, :, 0].mean()],
-                          atol=1e-2)
-      self.assertAllClose([0.4**2 + 0.6**2], [prob_val[:, :, :, 1].mean()],
-                          atol=1e-2)
+      self.assertAllClose(
+          [0.2**2 + 0.8**2], [prob_val[:, :, :, 0].mean()], atol=1e-2)
+      self.assertAllClose(
+          [0.4**2 + 0.6**2], [prob_val[:, :, :, 1].mean()], atol=1e-2)

  def testLogPMFBroadcasting(self):
    with self.test_session():
      histograms = [[[0.2, 0.8], [0.4, 0.6]]]
-      dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+      dist = categorical.Categorical(math_ops.log(histograms) - 50.)

      prob = dist.prob(1)
      self.assertAllClose([[0.8, 0.6]], prob.eval())
@@ -194,7 +207,7 @@ class CategoricalTest(tf.test.TestCase):
    with self.test_session():
      # shape [1, 2, 2]
      histograms = [[[0.2, 0.8], [0.4, 0.6]]]
-      dist = tf.contrib.distributions.Categorical(tf.log(histograms))
+      dist = categorical.Categorical(math_ops.log(histograms))

      log_prob = dist.log_prob([0, 1])
      self.assertEqual(2, log_prob.get_shape().ndims)
@@ -206,7 +219,7 @@ class CategoricalTest(tf.test.TestCase):

  def testLogPMFShapeNoBatch(self):
    histograms = [0.2, 0.8]
-    dist = tf.contrib.distributions.Categorical(tf.log(histograms))
+    dist = categorical.Categorical(math_ops.log(histograms))

    log_prob = dist.log_prob(0)
    self.assertEqual(0, log_prob.get_shape().ndims)
@@ -219,10 +232,11 @@ class CategoricalTest(tf.test.TestCase):
  def testMode(self):
    with self.test_session():
      histograms = [[[0.2, 0.8], [0.6, 0.4]]]
-      dist = tf.contrib.distributions.Categorical(tf.log(histograms) - 50.)
+      dist = categorical.Categorical(math_ops.log(histograms) - 50.)
      self.assertAllEqual(dist.mode().eval(), [[1, 0]])

  def testCategoricalCategoricalKL(self):
+
    def np_softmax(logits):
      exp_logits = np.exp(logits)
      return exp_logits / exp_logits.sum(axis=-1, keepdims=True)
@@ -233,18 +247,18 @@ class CategoricalTest(tf.test.TestCase):
          a_logits = np.random.randn(batch_size, categories)
          b_logits = np.random.randn(batch_size, categories)

-          a = tf.contrib.distributions.Categorical(logits=a_logits)
-          b = tf.contrib.distributions.Categorical(logits=b_logits)
+          a = categorical.Categorical(logits=a_logits)
+          b = categorical.Categorical(logits=b_logits)

-          kl = tf.contrib.distributions.kl(a, b)
+          kl = kullback_leibler.kl(a, b)
          kl_val = sess.run(kl)
          # Make sure KL(a||a) is 0
-          kl_same = sess.run(tf.contrib.distributions.kl(a, a))
+          kl_same = sess.run(kullback_leibler.kl(a, a))

          prob_a = np_softmax(a_logits)
          prob_b = np_softmax(b_logits)
-          kl_expected = np.sum(
-              prob_a * (np.log(prob_a) - np.log(prob_b)), axis=-1)
+          kl_expected = np.sum(prob_a * (np.log(prob_a) - np.log(prob_b)),
+                               axis=-1)

          self.assertEqual(kl.get_shape(), (batch_size,))
          self.assertAllClose(kl_val, kl_expected)
@@ -252,4 +266,4 @@ class CategoricalTest(tf.test.TestCase):


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/chi2_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/chi2_test.py
@@ -20,18 +20,21 @@ from __future__ import print_function

 import numpy as np
 from scipy import stats
-import tensorflow as tf
+from tensorflow.contrib.distributions.python.ops import chi2 as chi2_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test


-class Chi2Test(tf.test.TestCase):
+class Chi2Test(test.TestCase):

  def testChi2LogPDF(self):
    with self.test_session():
      batch_size = 6
-      df = tf.constant([2.0] * batch_size, dtype=np.float64)
+      df = constant_op.constant([2.0] * batch_size, dtype=np.float64)
      df_v = 2.0
      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float64)
-      chi2 = tf.contrib.distributions.Chi2(df=df)
+      chi2 = chi2_lib.Chi2(df=df)
      expected_log_pdf = stats.chi2.logpdf(x, df_v)

      log_pdf = chi2.log_pdf(x)
@@ -45,11 +48,11 @@ class Chi2Test(tf.test.TestCase):
  def testChi2CDF(self):
    with self.test_session():
      batch_size = 6
-      df = tf.constant([2.0] * batch_size, dtype=np.float64)
+      df = constant_op.constant([2.0] * batch_size, dtype=np.float64)
      df_v = 2.0
      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float64)

-      chi2 = tf.contrib.distributions.Chi2(df=df)
+      chi2 = chi2_lib.Chi2(df=df)
      expected_cdf = stats.chi2.cdf(x, df_v)

      cdf = chi2.cdf(x)
@@ -60,7 +63,7 @@ class Chi2Test(tf.test.TestCase):
    with self.test_session():
      df_v = np.array([1., 3, 5], dtype=np.float64)
      expected_mean = stats.chi2.mean(df_v)
-      chi2 = tf.contrib.distributions.Chi2(df=df_v)
+      chi2 = chi2_lib.Chi2(df=df_v)
      self.assertEqual(chi2.mean().get_shape(), (3,))
      self.assertAllClose(chi2.mean().eval(), expected_mean)

@@ -68,7 +71,7 @@ class Chi2Test(tf.test.TestCase):
    with self.test_session():
      df_v = np.array([1., 3, 5], np.float64)
      expected_variances = stats.chi2.var(df_v)
-      chi2 = tf.contrib.distributions.Chi2(df=df_v)
+      chi2 = chi2_lib.Chi2(df=df_v)
      self.assertEqual(chi2.variance().get_shape(), (3,))
      self.assertAllClose(chi2.variance().eval(), expected_variances)

@@ -76,16 +79,17 @@ class Chi2Test(tf.test.TestCase):
    with self.test_session():
      df_v = np.array([1., 3, 5], dtype=np.float64)
      expected_entropy = stats.chi2.entropy(df_v)
-      chi2 = tf.contrib.distributions.Chi2(df=df_v)
+      chi2 = chi2_lib.Chi2(df=df_v)
      self.assertEqual(chi2.entropy().get_shape(), (3,))
      self.assertAllClose(chi2.entropy().eval(), expected_entropy)

  def testChi2WithAbsDf(self):
    with self.test_session():
      df_v = np.array([-1.3, -3.2, 5], dtype=np.float64)
-      chi2 = tf.contrib.distributions.Chi2WithAbsDf(df=df_v)
-      self.assertAllClose(tf.floor(tf.abs(df_v)).eval(), chi2.df.eval())
+      chi2 = chi2_lib.Chi2WithAbsDf(df=df_v)
+      self.assertAllClose(
+          math_ops.floor(math_ops.abs(df_v)).eval(), chi2.df.eval())


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_multinomial_test.py
@@ -17,12 +17,16 @@ from __future__ import division
 from __future__ import print_function

 import numpy as np
-import tensorflow as tf
+from tensorflow.contrib import distributions
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.platform import test

-ds = tf.contrib.distributions
+ds = distributions


-class DirichletMultinomialTest(tf.test.TestCase):
+class DirichletMultinomialTest(test.TestCase):

  def setUp(self):
    self._rng = np.random.RandomState(42)
@@ -33,8 +37,8 @@ class DirichletMultinomialTest(tf.test.TestCase):
      dist = ds.DirichletMultinomial(1., alpha)
      self.assertEqual(3, dist.event_shape().eval())
      self.assertAllEqual([], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([3]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([3]), dist.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), dist.get_batch_shape())

  def testComplexShapes(self):
    with self.test_session():
@@ -43,8 +47,8 @@ class DirichletMultinomialTest(tf.test.TestCase):
      dist = ds.DirichletMultinomial(n, alpha)
      self.assertEqual(2, dist.event_shape().eval())
      self.assertAllEqual([3, 2], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([2]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([3, 2]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([2]), dist.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([3, 2]), dist.get_batch_shape())

  def testNproperty(self):
    alpha = [[1., 2, 3]]
@@ -65,8 +69,7 @@ class DirichletMultinomialTest(tf.test.TestCase):
    alpha = [[1., 2, 3]]
    n = [[5.]]
    with self.test_session():
-      dist = ds.DirichletMultinomial(
-          n, alpha, validate_args=True)
+      dist = ds.DirichletMultinomial(n, alpha, validate_args=True)
      dist.pmf([2., 3, 0]).eval()
      dist.pmf([3., 0, 2]).eval()
      with self.assertRaisesOpError("Condition x >= 0.*"):
@@ -78,16 +81,14 @@ class DirichletMultinomialTest(tf.test.TestCase):
    alpha = [[1., 2, 3]]
    n = [[5.]]
    with self.test_session():
-      dist = ds.DirichletMultinomial(
-          n, alpha, validate_args=True)
+      dist = ds.DirichletMultinomial(n, alpha, validate_args=True)
      dist.pmf([2., 3, 0]).eval()
      dist.pmf([3., 0, 2]).eval()
      dist.pmf([3.0, 0, 2.0]).eval()
      # Both equality and integer checking fail.
      with self.assertRaisesOpError("Condition x == y.*"):
        dist.pmf([1.0, 2.5, 1.5]).eval()
-      dist = ds.DirichletMultinomial(
-          n, alpha, validate_args=False)
+      dist = ds.DirichletMultinomial(n, alpha, validate_args=False)
      dist.pmf([1., 2., 3.]).eval()
      # Non-integer arguments work.
      dist.pmf([1.0, 2.5, 1.5]).eval()
@@ -155,8 +156,7 @@ class DirichletMultinomialTest(tf.test.TestCase):
    with self.test_session():
      alpha = [[1., 2], [2., 3]]
      counts = [[1., 0]]
-      pmf = ds.DirichletMultinomial(
-          [1., 1.], alpha).pmf(counts)
+      pmf = ds.DirichletMultinomial([1., 1.], alpha).pmf(counts)
      self.assertAllClose([1 / 3., 2 / 5.], pmf.eval())
      self.assertEqual((2), pmf.get_shape())

@@ -220,13 +220,15 @@ class DirichletMultinomialTest(tf.test.TestCase):
    # Off diagonal entries are of the form:
    # Cov(X_i, X_j) = -n * alpha_i * alpha_j / (alpha_sum ** 2) *
    # (alpha_sum + n) / (alpha_sum + 1)
-    covariance_entry = lambda a, b, a_sum: -a  * b/ a_sum**2
+    covariance_entry = lambda a, b, a_sum: -a * b / a_sum**2
    # Shape [2, 2].
-    shared_matrix = np.array([
-        [variance_entry(alpha[0], alpha_0),
-         covariance_entry(alpha[0], alpha[1], alpha_0)],
-        [covariance_entry(alpha[1], alpha[0], alpha_0),
-         variance_entry(alpha[1], alpha_0)]])
+    shared_matrix = np.array([[
+        variance_entry(alpha[0], alpha_0),
+        covariance_entry(alpha[0], alpha[1], alpha_0)
+    ], [
+        covariance_entry(alpha[1], alpha[0], alpha_0),
+        variance_entry(alpha[1], alpha_0)
+    ]])

    with self.test_session():
      for n in ns:
@@ -248,25 +250,30 @@ class DirichletMultinomialTest(tf.test.TestCase):
    ns = np.array([[2.], [3.], [4.], [5.]], dtype=np.float32)

    variance_entry = lambda a, a_sum: a / a_sum * (1 - a / a_sum)
-    covariance_entry = lambda a, b, a_sum: -a  * b/ a_sum**2
+    covariance_entry = lambda a, b, a_sum: -a * b / a_sum**2
    # Shape [4, 3, 3]
-    shared_matrix = np.array(4 * [[
-        [variance_entry(alpha_v[0], alpha_0),
-         covariance_entry(alpha_v[0], alpha_v[1], alpha_0),
-         covariance_entry(alpha_v[0], alpha_v[2], alpha_0)],
-        [covariance_entry(alpha_v[1], alpha_v[0], alpha_0),
-         variance_entry(alpha_v[1], alpha_0),
-         covariance_entry(alpha_v[1], alpha_v[2], alpha_0)],
-        [covariance_entry(alpha_v[2], alpha_v[0], alpha_0),
-         covariance_entry(alpha_v[2], alpha_v[1], alpha_0),
-         variance_entry(alpha_v[2], alpha_0)]]], dtype=np.float32)
+    shared_matrix = np.array(
+        4 * [[[
+            variance_entry(alpha_v[0], alpha_0),
+            covariance_entry(alpha_v[0], alpha_v[1], alpha_0),
+            covariance_entry(alpha_v[0], alpha_v[2], alpha_0)
+        ], [
+            covariance_entry(alpha_v[1], alpha_v[0], alpha_0),
+            variance_entry(alpha_v[1], alpha_0),
+            covariance_entry(alpha_v[1], alpha_v[2], alpha_0)
+        ], [
+            covariance_entry(alpha_v[2], alpha_v[0], alpha_0),
+            covariance_entry(alpha_v[2], alpha_v[1], alpha_0),
+            variance_entry(alpha_v[2], alpha_0)
+        ]]],
+        dtype=np.float32)

    with self.test_session():
      # ns is shape [4, 1], and alpha is shape [4, 3].
      dist = ds.DirichletMultinomial(ns, alpha)
      variance = dist.variance()
-      expected_variance = np.expand_dims(
-          ns * (ns + alpha_0) / (1 + alpha_0), -1) * shared_matrix
+      expected_variance = np.expand_dims(ns * (ns + alpha_0) / (1 + alpha_0),
+                                         -1) * shared_matrix

      self.assertEqual((4, 3, 3), variance.get_shape())
      self.assertAllClose(expected_variance, variance.eval())
@@ -360,8 +367,7 @@ class DirichletMultinomialTest(tf.test.TestCase):
      alpha = [[-1., 2]]  # alpha should be positive.
      counts = [[1., 0], [0., -1]]  # counts should be non-negative.
      n = [-5.3]  # n should be a non negative integer equal to counts.sum.
-      dist = ds.DirichletMultinomial(
-          n, alpha, validate_args=False)
+      dist = ds.DirichletMultinomial(n, alpha, validate_args=False)
      dist.pmf(counts).eval()  # Should not raise.

  def testSampleUnbiasedNonScalarBatch(self):
@@ -370,10 +376,11 @@ class DirichletMultinomialTest(tf.test.TestCase):
          n=5., alpha=2. * self._rng.rand(4, 3, 2).astype(np.float32))
      n = int(3e3)
      x = dist.sample(n, seed=0)
-      sample_mean = tf.reduce_mean(x, 0)
+      sample_mean = math_ops.reduce_mean(x, 0)
      # Cyclically rotate event dims left.
-      x_centered = tf.transpose(x - sample_mean, [1, 2, 3, 0])
-      sample_covariance = tf.matmul(x_centered, x_centered, adjoint_b=True) / n
+      x_centered = array_ops.transpose(x - sample_mean, [1, 2, 3, 0])
+      sample_covariance = math_ops.matmul(
+          x_centered, x_centered, adjoint_b=True) / n
      [
          sample_mean_,
          sample_covariance_,
@@ -386,11 +393,10 @@ class DirichletMultinomialTest(tf.test.TestCase):
          dist.variance(),
      ])
      self.assertAllEqual([4, 3, 2], sample_mean.get_shape())
-      self.assertAllClose(actual_mean_, sample_mean_,
-                          atol=0., rtol=0.15)
+      self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.15)
      self.assertAllEqual([4, 3, 2, 2], sample_covariance.get_shape())
-      self.assertAllClose(actual_covariance_, sample_covariance_,
-                          atol=0., rtol=0.20)
+      self.assertAllClose(
+          actual_covariance_, sample_covariance_, atol=0., rtol=0.20)

  def testSampleUnbiasedScalarBatch(self):
    with self.test_session() as sess:
@@ -398,9 +404,10 @@ class DirichletMultinomialTest(tf.test.TestCase):
          n=5., alpha=2. * self._rng.rand(4).astype(np.float32))
      n = int(5e3)
      x = dist.sample(n, seed=0)
-      sample_mean = tf.reduce_mean(x, 0)
+      sample_mean = math_ops.reduce_mean(x, 0)
      x_centered = x - sample_mean  # Already transposed to [n, 2].
-      sample_covariance = tf.matmul(x_centered, x_centered, adjoint_a=True) / n
+      sample_covariance = math_ops.matmul(
+          x_centered, x_centered, adjoint_a=True) / n
      [
          sample_mean_,
          sample_covariance_,
@@ -413,12 +420,11 @@ class DirichletMultinomialTest(tf.test.TestCase):
          dist.variance(),
      ])
      self.assertAllEqual([4], sample_mean.get_shape())
-      self.assertAllClose(actual_mean_, sample_mean_,
-                          atol=0., rtol=0.05)
+      self.assertAllClose(actual_mean_, sample_mean_, atol=0., rtol=0.05)
      self.assertAllEqual([4, 4], sample_covariance.get_shape())
-      self.assertAllClose(actual_covariance_, sample_covariance_,
-                          atol=0., rtol=0.15)
+      self.assertAllClose(
+          actual_covariance_, sample_covariance_, atol=0., rtol=0.15)


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/dirichlet_test.py
@@ -18,40 +18,43 @@ from __future__ import print_function

 import numpy as np
 from scipy import stats
-import tensorflow as tf
+from tensorflow.contrib.distributions.python.ops import dirichlet as dirichlet_lib
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.platform import test


-class DirichletTest(tf.test.TestCase):
+class DirichletTest(test.TestCase):

  def testSimpleShapes(self):
    with self.test_session():
      alpha = np.random.rand(3)
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      self.assertEqual(3, dist.event_shape().eval())
      self.assertAllEqual([], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([3]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([3]), dist.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([]), dist.get_batch_shape())

  def testComplexShapes(self):
    with self.test_session():
      alpha = np.random.rand(3, 2, 2)
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      self.assertEqual(2, dist.event_shape().eval())
      self.assertAllEqual([3, 2], dist.batch_shape().eval())
-      self.assertEqual(tf.TensorShape([2]), dist.get_event_shape())
-      self.assertEqual(tf.TensorShape([3, 2]), dist.get_batch_shape())
+      self.assertEqual(tensor_shape.TensorShape([2]), dist.get_event_shape())
+      self.assertEqual(tensor_shape.TensorShape([3, 2]), dist.get_batch_shape())

  def testAlphaProperty(self):
    alpha = [[1., 2, 3]]
    with self.test_session():
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      self.assertEqual([1, 3], dist.alpha.get_shape())
      self.assertAllClose(alpha, dist.alpha.eval())

  def testPdfXProper(self):
    alpha = [[1., 2, 3]]
    with self.test_session():
-      dist = tf.contrib.distributions.Dirichlet(alpha, validate_args=True)
+      dist = dirichlet_lib.Dirichlet(alpha, validate_args=True)
      dist.pdf([.1, .3, .6]).eval()
      dist.pdf([.2, .3, .5]).eval()
      # Either condition can trigger.
@@ -66,7 +69,7 @@ class DirichletTest(tf.test.TestCase):
    with self.test_session():
      alpha = [1., 2]
      x = [.5, .5]
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      pdf = dist.pdf(x)
      self.assertAllClose(1., pdf.eval())
      self.assertEqual((), pdf.get_shape())
@@ -75,9 +78,9 @@ class DirichletTest(tf.test.TestCase):
    with self.test_session():
      alpha = [1., 2]
      x = [.3, .7]
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      pdf = dist.pdf(x)
-      self.assertAllClose(7./5, pdf.eval())
+      self.assertAllClose(7. / 5, pdf.eval())
      self.assertEqual((), pdf.get_shape())

  def testPdfUniformZeroBatches(self):
@@ -85,7 +88,7 @@ class DirichletTest(tf.test.TestCase):
      # Corresponds to a uniform distribution
      alpha = [1., 1, 1]
      x = [[.2, .5, .3], [.3, .4, .3]]
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      pdf = dist.pdf(x)
      self.assertAllClose([2., 2.], pdf.eval())
      self.assertEqual((2), pdf.get_shape())
@@ -94,40 +97,40 @@ class DirichletTest(tf.test.TestCase):
    with self.test_session():
      alpha = [[1., 2]]
      x = [[.5, .5], [.3, .7]]
-      dist = tf.contrib.distributions.Dirichlet(alpha)
+      dist = dirichlet_lib.Dirichlet(alpha)
      pdf = dist.pdf(x)
-      self.assertAllClose([1., 7./5], pdf.eval())
+      self.assertAllClose([1., 7. / 5], pdf.eval())
      self.assertEqual((2), pdf.get_shape())

  def testPdfAlphaStretchedInBroadcastWhenLowerRank(self):
    with self.test_session():
      alpha = [1., 2]
      x = [[.5, .5], [.2, .8]]
-      pdf = tf.contrib.distributions.Dirichlet(alpha).pdf(x)
-      self.assertAllClose([1., 8./5], pdf.eval())
+      pdf = dirichlet_lib.Dirichlet(alpha).pdf(x)
+      self.assertAllClose([1., 8. / 5], pdf.eval())
      self.assertEqual((2), pdf.get_shape())

  def testPdfXStretchedInBroadcastWhenSameRank(self):
    with self.test_session():
      alpha = [[1., 2], [2., 3]]
      x = [[.5, .5]]
-      pdf = tf.contrib.distributions.Dirichlet(alpha).pdf(x)
-      self.assertAllClose([1., 3./2], pdf.eval())
+      pdf = dirichlet_lib.Dirichlet(alpha).pdf(x)
+      self.assertAllClose([1., 3. / 2], pdf.eval())
      self.assertEqual((2), pdf.get_shape())

  def testPdfXStretchedInBroadcastWhenLowerRank(self):
    with self.test_session():
      alpha = [[1., 2], [2., 3]]
      x = [.5, .5]
-      pdf = tf.contrib.distributions.Dirichlet(alpha).pdf(x)
-      self.assertAllClose([1., 3./2], pdf.eval())
+      pdf = dirichlet_lib.Dirichlet(alpha).pdf(x)
+      self.assertAllClose([1., 3. / 2], pdf.eval())
      self.assertEqual((2), pdf.get_shape())

  def testDirichletMean(self):
    with self.test_session():
      alpha = [1., 2, 3]
      expected_mean = stats.dirichlet.mean(alpha)
-      dirichlet = tf.contrib.distributions.Dirichlet(alpha=alpha)
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha)
      self.assertEqual(dirichlet.mean().get_shape(), (3,))
      self.assertAllClose(dirichlet.mean().eval(), expected_mean)

@@ -136,34 +139,32 @@ class DirichletTest(tf.test.TestCase):
      alpha = [1., 2, 3]
      denominator = np.sum(alpha)**2 * (np.sum(alpha) + 1)
      expected_variance = np.diag(stats.dirichlet.var(alpha))
-      expected_variance += [
-          [0., -2, -3], [-2, 0, -6], [-3, -6, 0]] / denominator
-      dirichlet = tf.contrib.distributions.Dirichlet(alpha=alpha)
+      expected_variance += [[0., -2, -3], [-2, 0, -6],
+                            [-3, -6, 0]] / denominator
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha)
      self.assertEqual(dirichlet.variance().get_shape(), (3, 3))
      self.assertAllClose(dirichlet.variance().eval(), expected_variance)

  def testDirichletMode(self):
    with self.test_session():
      alpha = np.array([1.1, 2, 3])
-      expected_mode = (alpha - 1)/(np.sum(alpha) - 3)
-      dirichlet = tf.contrib.distributions.Dirichlet(alpha=alpha)
+      expected_mode = (alpha - 1) / (np.sum(alpha) - 3)
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha)
      self.assertEqual(dirichlet.mode().get_shape(), (3,))
      self.assertAllClose(dirichlet.mode().eval(), expected_mode)

  def testDirichletModeInvalid(self):
    with self.test_session():
      alpha = np.array([1., 2, 3])
-      dirichlet = tf.contrib.distributions.Dirichlet(
-          alpha=alpha, allow_nan_stats=False)
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha, allow_nan_stats=False)
      with self.assertRaisesOpError("Condition x < y.*"):
        dirichlet.mode().eval()

  def testDirichletModeEnableAllowNanStats(self):
    with self.test_session():
      alpha = np.array([1., 2, 3])
-      dirichlet = tf.contrib.distributions.Dirichlet(
-          alpha=alpha, allow_nan_stats=True)
-      expected_mode = (alpha - 1)/(np.sum(alpha) - 3)
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha, allow_nan_stats=True)
+      expected_mode = (alpha - 1) / (np.sum(alpha) - 3)
      expected_mode[0] = np.nan

      self.assertEqual(dirichlet.mode().get_shape(), (3,))
@@ -173,15 +174,15 @@ class DirichletTest(tf.test.TestCase):
    with self.test_session():
      alpha = [1., 2, 3]
      expected_entropy = stats.dirichlet.entropy(alpha)
-      dirichlet = tf.contrib.distributions.Dirichlet(alpha=alpha)
+      dirichlet = dirichlet_lib.Dirichlet(alpha=alpha)
      self.assertEqual(dirichlet.entropy().get_shape(), ())
      self.assertAllClose(dirichlet.entropy().eval(), expected_entropy)

  def testDirichletSample(self):
    with self.test_session():
      alpha = [1., 2]
-      dirichlet = tf.contrib.distributions.Dirichlet(alpha)
-      n = tf.constant(100000)
+      dirichlet = dirichlet_lib.Dirichlet(alpha)
+      n = constant_op.constant(100000)
      samples = dirichlet.sample(n)
      sample_values = samples.eval()
      self.assertEqual(sample_values.shape, (100000, 2))
@@ -189,8 +190,11 @@ class DirichletTest(tf.test.TestCase):
      self.assertLess(
          stats.kstest(
              # Beta is a univariate distribution.
-              sample_values[:, 0], stats.beta(a=1., b=2.).cdf)[0],
+              sample_values[:, 0],
+              stats.beta(
+                  a=1., b=2.).cdf)[0],
          0.01)

+
 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_test.py
@@ -16,39 +16,39 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import tensorflow as tf
+from tensorflow.contrib import distributions
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import random_ops
+from tensorflow.python.platform import test

-dists = tf.contrib.distributions
+dists = distributions


-class DistributionTest(tf.test.TestCase):
+class DistributionTest(test.TestCase):

  def testParamShapesAndFromParams(self):
    classes = [
-        dists.Normal,
-        dists.Bernoulli,
-        dists.Beta,
-        dists.Chi2,
-        dists.Exponential,
-        dists.Gamma,
-        dists.InverseGamma,
-        dists.Laplace,
-        dists.StudentT,
-        dists.Uniform]
+        dists.Normal, dists.Bernoulli, dists.Beta, dists.Chi2,
+        dists.Exponential, dists.Gamma, dists.InverseGamma, dists.Laplace,
+        dists.StudentT, dists.Uniform
+    ]

    sample_shapes = [(), (10,), (10, 20, 30)]
    with self.test_session():
      for cls in classes:
        for sample_shape in sample_shapes:
          param_shapes = cls.param_shapes(sample_shape)
-          params = dict([(name, tf.random_normal(shape))
+          params = dict([(name, random_ops.random_normal(shape))
                         for name, shape in param_shapes.items()])
          dist = cls(**params)
-          self.assertAllEqual(sample_shape, tf.shape(dist.sample()).eval())
+          self.assertAllEqual(sample_shape,
+                              array_ops.shape(dist.sample()).eval())
          dist_copy = dist.copy()
          self.assertAllEqual(sample_shape,
-                              tf.shape(dist_copy.sample()).eval())
+                              array_ops.shape(dist_copy.sample()).eval())
          self.assertEqual(dist.parameters, dist_copy.parameters)

  def testCopyExtraArgs(self):
@@ -57,8 +57,8 @@ class DistributionTest(tf.test.TestCase):
      # different initialization arguments. We therefore spot test a few.
      normal = dists.Normal(mu=1., sigma=2., validate_args=True)
      self.assertEqual(normal.parameters, normal.copy().parameters)
-      wishart = dists.WishartFull(df=2, scale=[[1., 2], [2, 5]],
-                                  validate_args=True)
+      wishart = dists.WishartFull(
+          df=2, scale=[[1., 2], [2, 5]], validate_args=True)
      self.assertEqual(wishart.parameters, wishart.copy().parameters)

  def testCopyOverride(self):
@@ -67,8 +67,8 @@ class DistributionTest(tf.test.TestCase):
      normal_copy = normal.copy(validate_args=False)
      base_params = normal.parameters.copy()
      copy_params = normal.copy(validate_args=False).parameters.copy()
-      self.assertNotEqual(base_params.pop("validate_args"),
-                          copy_params.pop("validate_args"))
+      self.assertNotEqual(
+          base_params.pop("validate_args"), copy_params.pop("validate_args"))
      self.assertEqual(base_params, copy_params)

  def testIsScalar(self):
@@ -76,23 +76,19 @@ class DistributionTest(tf.test.TestCase):
      mu = 1.
      sigma = 2.

-      normal = dists.Normal(mu, sigma,
-                            validate_args=True)
+      normal = dists.Normal(mu, sigma, validate_args=True)
      self.assertTrue(tensor_util.constant_value(normal.is_scalar_event))
      self.assertTrue(tensor_util.constant_value(normal.is_scalar_batch))

-      normal = dists.Normal([mu], [sigma],
-                            validate_args=True)
+      normal = dists.Normal([mu], [sigma], validate_args=True)
      self.assertTrue(tensor_util.constant_value(normal.is_scalar_event))
      self.assertFalse(tensor_util.constant_value(normal.is_scalar_batch))

-      mvn = dists.MultivariateNormalDiag([mu], [sigma],
-                                         validate_args=True)
+      mvn = dists.MultivariateNormalDiag([mu], [sigma], validate_args=True)
      self.assertFalse(tensor_util.constant_value(mvn.is_scalar_event))
      self.assertTrue(tensor_util.constant_value(mvn.is_scalar_batch))

-      mvn = dists.MultivariateNormalDiag([[mu]], [[sigma]],
-                                         validate_args=True)
+      mvn = dists.MultivariateNormalDiag([[mu]], [[sigma]], validate_args=True)
      self.assertFalse(tensor_util.constant_value(mvn.is_scalar_event))
      self.assertFalse(tensor_util.constant_value(mvn.is_scalar_batch))

@@ -100,24 +96,27 @@ class DistributionTest(tf.test.TestCase):
      # function.

      # Test case 1, 2.
-      x = tf.placeholder(dtype=tf.int32, shape=[])
+      x = array_ops.placeholder(dtype=dtypes.int32, shape=[])
      # None would fire an exception were it actually executed.
      self.assertTrue(normal._is_scalar_helper(x.get_shape, lambda: None))
-      self.assertTrue(normal._is_scalar_helper(lambda: tf.TensorShape(None),
-                                               lambda: tf.shape(x)))
+      self.assertTrue(
+          normal._is_scalar_helper(lambda: tensor_shape.TensorShape(None),
+                                   lambda: array_ops.shape(x)))

-      x = tf.placeholder(dtype=tf.int32, shape=[1])
+      x = array_ops.placeholder(dtype=dtypes.int32, shape=[1])
      # None would fire an exception were it actually executed.
      self.assertFalse(normal._is_scalar_helper(x.get_shape, lambda: None))
-      self.assertFalse(normal._is_scalar_helper(lambda: tf.TensorShape(None),
-                                                lambda: tf.shape(x)))
+      self.assertFalse(
+          normal._is_scalar_helper(lambda: tensor_shape.TensorShape(None),
+                                   lambda: array_ops.shape(x)))

      # Test case 3.
-      x = tf.placeholder(dtype=tf.int32)
-      is_scalar = normal._is_scalar_helper(x.get_shape, lambda: tf.shape(x))
+      x = array_ops.placeholder(dtype=dtypes.int32)
+      is_scalar = normal._is_scalar_helper(x.get_shape,
+                                           lambda: array_ops.shape(x))
      self.assertTrue(is_scalar.eval(feed_dict={x: 1}))
      self.assertFalse(is_scalar.eval(feed_dict={x: [1]}))


-if __name__ == '__main__':
-  tf.test.main()
+if __name__ == "__main__":
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/distribution_util_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/exponential_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/gamma_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/gamma_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/inverse_gamma_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/inverse_gamma_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/kullback_leibler_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/kullback_leibler_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/laplace_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/laplace_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mixture_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/multinomial_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/mvn_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_conjugate_posteriors_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/normal_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_cholesky_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_cholesky_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_diag_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_diag_test.py
@@ -20,10 +20,11 @@ from __future__ import print_function
 import abc
 import numpy as np
 import six
-import tensorflow as tf

 from tensorflow.contrib.distributions.python.ops import operator_pd_diag
 from tensorflow.contrib.distributions.python.ops import operator_test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.platform import test


 @six.add_metaclass(abc.ABCMeta)
@@ -75,24 +76,24 @@ class OperatorPDDiagBaseTest(object):
      operator.to_dense().eval()  # Should not raise


-class OperatorPDDiagTest(
-    OperatorPDDiagBaseTest, operator_test_util.OperatorPDDerivedClassTest):
+class OperatorPDDiagTest(OperatorPDDiagBaseTest,
+                         operator_test_util.OperatorPDDerivedClassTest):
  """Most tests done in the base classes."""

  def _diag_to_matrix(self, diag):
-    return tf.matrix_diag(diag).eval()
+    return array_ops.matrix_diag(diag).eval()

  @property
  def operator_class(self):
    return operator_pd_diag.OperatorPDDiag


-class OperatorPDSqrtDiagTest(
-    OperatorPDDiagBaseTest, operator_test_util.OperatorPDDerivedClassTest):
+class OperatorPDSqrtDiagTest(OperatorPDDiagBaseTest,
+                             operator_test_util.OperatorPDDerivedClassTest):
  """Most tests done in the base classes."""

  def _diag_to_matrix(self, diag):
-    return tf.matrix_diag(diag**2).eval()
+    return array_ops.matrix_diag(diag**2).eval()

  @property
  def operator_class(self):
@@ -100,4 +101,4 @@ class OperatorPDSqrtDiagTest(


 if __name__ == "__main__":
-  tf.test.main()
+  test.main()
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_full_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_full_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_identity_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_identity_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_vdvt_update_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/operator_pd_vdvt_update_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/poisson_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/poisson_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/quantized_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/quantized_distribution_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/shape_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/student_t_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/transformed_distribution_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/uniform_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/uniform_test.py
--- a/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/wishart_test.py
--- a/tensorflow/contrib/distributions/python/ops/operator_test_util.py
+++ b/tensorflow/contrib/distributions/python/ops/operator_test_util.py
--- a/tensorflow/contrib/distributions/python/ops/student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/student_t.py
--- a/tensorflow/contrib/factorization/BUILD
+++ b/tensorflow/contrib/factorization/BUILD
--- a/tensorflow/contrib/factorization/python/kernel_tests/clustering_ops_test.py
+++ b/tensorflow/contrib/factorization/python/kernel_tests/clustering_ops_test.py
--- a/tensorflow/contrib/factorization/python/kernel_tests/wals_solver_ops_test.py
+++ b/tensorflow/contrib/factorization/python/kernel_tests/wals_solver_ops_test.py
--- a/tensorflow/contrib/factorization/python/ops/clustering_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/clustering_ops.py
--- a/tensorflow/contrib/factorization/python/ops/factorization_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/factorization_ops.py
--- a/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py
+++ b/tensorflow/contrib/factorization/python/ops/factorization_ops_test.py
--- a/tensorflow/contrib/factorization/python/ops/gmm.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm.py
--- a/tensorflow/contrib/factorization/python/ops/gmm_ops.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops.py
--- a/tensorflow/contrib/factorization/python/ops/gmm_ops_test.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_ops_test.py
--- a/tensorflow/contrib/factorization/python/ops/gmm_test.py
+++ b/tensorflow/contrib/factorization/python/ops/gmm_test.py
--- a/tensorflow/contrib/ffmpeg/BUILD
+++ b/tensorflow/contrib/ffmpeg/BUILD
--- a/tensorflow/contrib/ffmpeg/decode_audio_op_test.py
+++ b/tensorflow/contrib/ffmpeg/decode_audio_op_test.py
--- a/tensorflow/contrib/ffmpeg/encode_audio_op_test.py
+++ b/tensorflow/contrib/ffmpeg/encode_audio_op_test.py
--- a/tensorflow/contrib/framework/BUILD
+++ b/tensorflow/contrib/framework/BUILD
--- a/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py
+++ b/tensorflow/contrib/framework/python/framework/checkpoint_utils_test.py
--- a/tensorflow/contrib/framework/python/framework/experimental_test.py
+++ b/tensorflow/contrib/framework/python/framework/experimental_test.py
--- a/tensorflow/contrib/framework/python/framework/tensor_util_test.py
+++ b/tensorflow/contrib/framework/python/framework/tensor_util_test.py
--- a/tensorflow/contrib/framework/python/ops/arg_scope_test.py
+++ b/tensorflow/contrib/framework/python/ops/arg_scope_test.py
--- a/tensorflow/contrib/framework/python/ops/ops_test.py
+++ b/tensorflow/contrib/framework/python/ops/ops_test.py
--- a/tensorflow/contrib/framework/python/ops/prettyprint_ops_test.py
+++ b/tensorflow/contrib/framework/python/ops/prettyprint_ops_test.py
--- a/tensorflow/contrib/framework/python/ops/variables_test.py
+++ b/tensorflow/contrib/framework/python/ops/variables_test.py
--- a/tensorflow/contrib/graph_editor/BUILD
+++ b/tensorflow/contrib/graph_editor/BUILD
--- a/tensorflow/contrib/graph_editor/tests/edit_test.py
+++ b/tensorflow/contrib/graph_editor/tests/edit_test.py
--- a/tensorflow/contrib/graph_editor/tests/match_test.py
+++ b/tensorflow/contrib/graph_editor/tests/match_test.py
--- a/tensorflow/contrib/graph_editor/tests/reroute_test.py
+++ b/tensorflow/contrib/graph_editor/tests/reroute_test.py
--- a/tensorflow/contrib/graph_editor/tests/select_test.py
+++ b/tensorflow/contrib/graph_editor/tests/select_test.py
--- a/tensorflow/contrib/graph_editor/tests/subgraph_test.py
+++ b/tensorflow/contrib/graph_editor/tests/subgraph_test.py
--- a/tensorflow/contrib/graph_editor/tests/transform_test.py
+++ b/tensorflow/contrib/graph_editor/tests/transform_test.py
--- a/tensorflow/contrib/graph_editor/tests/util_test.py
+++ b/tensorflow/contrib/graph_editor/tests/util_test.py
--- a/tensorflow/contrib/grid_rnn/BUILD
+++ b/tensorflow/contrib/grid_rnn/BUILD
--- a/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py
+++ b/tensorflow/contrib/grid_rnn/python/kernel_tests/grid_rnn_test.py
--- a/tensorflow/contrib/image/BUILD
+++ b/tensorflow/contrib/image/BUILD
--- a/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
+++ b/tensorflow/contrib/image/python/kernel_tests/image_ops_test.py
--- a/tensorflow/contrib/input_pipeline/BUILD
+++ b/tensorflow/contrib/input_pipeline/BUILD
--- a/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py
+++ b/tensorflow/contrib/input_pipeline/python/ops/input_pipeline_ops_test.py
--- a/tensorflow/contrib/integrate/BUILD
+++ b/tensorflow/contrib/integrate/BUILD
--- a/tensorflow/contrib/integrate/python/ops/odes_test.py
+++ b/tensorflow/contrib/integrate/python/ops/odes_test.py
--- a/tensorflow/contrib/labeled_tensor/BUILD
+++ b/tensorflow/contrib/labeled_tensor/BUILD
--- a/tensorflow/contrib/labeled_tensor/python/ops/core_test.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/core_test.py
--- a/tensorflow/contrib/labeled_tensor/python/ops/io_ops_test.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/io_ops_test.py
--- a/tensorflow/contrib/labeled_tensor/python/ops/nn_test.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/nn_test.py
--- a/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/ops_test.py
--- a/tensorflow/contrib/labeled_tensor/python/ops/sugar_test.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/sugar_test.py
--- a/tensorflow/contrib/labeled_tensor/python/ops/test_util.py
+++ b/tensorflow/contrib/labeled_tensor/python/ops/test_util.py
--- a/tensorflow/contrib/layers/BUILD
+++ b/tensorflow/contrib/layers/BUILD
--- a/tensorflow/contrib/layers/python/kernel_tests/bucketization_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/bucketization_op_test.py
--- a/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
+++ b/tensorflow/contrib/layers/python/kernel_tests/sparse_feature_cross_op_test.py
--- a/tensorflow/contrib/layers/python/layers/embedding_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/embedding_ops_test.py
--- a/tensorflow/contrib/layers/python/layers/encoders_test.py
+++ b/tensorflow/contrib/layers/python/layers/encoders_test.py
--- a/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_ops_test.py
--- a/tensorflow/contrib/layers/python/layers/feature_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/feature_column_test.py
--- a/tensorflow/contrib/layers/python/layers/initializers_test.py
+++ b/tensorflow/contrib/layers/python/layers/initializers_test.py
--- a/tensorflow/contrib/layers/python/layers/layers_test.py
+++ b/tensorflow/contrib/layers/python/layers/layers_test.py
--- a/tensorflow/contrib/layers/python/layers/optimizers.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers.py
--- a/tensorflow/contrib/layers/python/layers/optimizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/optimizers_test.py
--- a/tensorflow/contrib/layers/python/layers/regularizers_test.py
+++ b/tensorflow/contrib/layers/python/layers/regularizers_test.py
--- a/tensorflow/contrib/layers/python/layers/summaries.py
+++ b/tensorflow/contrib/layers/python/layers/summaries.py
--- a/tensorflow/contrib/layers/python/layers/summaries_test.py
+++ b/tensorflow/contrib/layers/python/layers/summaries_test.py
--- a/tensorflow/contrib/layers/python/layers/target_column_test.py
+++ b/tensorflow/contrib/layers/python/layers/target_column_test.py
--- a/tensorflow/contrib/layers/python/layers/utils_test.py
+++ b/tensorflow/contrib/layers/python/layers/utils_test.py
--- a/tensorflow/contrib/layers/python/ops/sparse_ops_test.py
+++ b/tensorflow/contrib/layers/python/ops/sparse_ops_test.py
--- a/tensorflow/contrib/learn/BUILD
+++ b/tensorflow/contrib/learn/BUILD
--- a/tensorflow/contrib/learn/python/learn/dataframe/queues/feeding_functions.py
+++ b/tensorflow/contrib/learn/python/learn/dataframe/queues/feeding_functions.py
--- a/tensorflow/contrib/learn/python/learn/datasets/BUILD
+++ b/tensorflow/contrib/learn/python/learn/datasets/BUILD
--- a/tensorflow/contrib/learn/python/learn/datasets/base_test.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/base_test.py
--- a/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/load_csv_test.py
--- a/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py
+++ b/tensorflow/contrib/learn/python/learn/datasets/produce_small_datasets.py
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model.py
--- a/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/composable_model_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_benchmark_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_benchmark_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_linear_combined_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dnn_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/dynamic_rnn_estimator_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimator_test_utils.py
--- a/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/estimators_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/head.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head.py
--- a/tensorflow/contrib/learn/python/learn/estimators/head_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/head_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans.py
--- a/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/kmeans_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/linear_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/logistic_regressor_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/multioutput_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/nonlinear_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/random_forest_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/regression_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/regression_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config.py
--- a/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/run_config_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/stability_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/stability_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/svm_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/svm_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/tensor_signature_test.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/tensor_signature_test.py
--- a/tensorflow/contrib/learn/python/learn/estimators/test_data.py
+++ b/tensorflow/contrib/learn/python/learn/estimators/test_data.py
--- a/tensorflow/contrib/learn/python/learn/experiment_test.py
+++ b/tensorflow/contrib/learn/python/learn/experiment_test.py
--- a/tensorflow/contrib/learn/python/learn/graph_actions_test.py
+++ b/tensorflow/contrib/learn/python/learn/graph_actions_test.py
--- a/tensorflow/contrib/learn/python/learn/grid_search_test.py
+++ b/tensorflow/contrib/learn/python/learn/grid_search_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/data_feeder_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/graph_io_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/io_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/numpy_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/numpy_io_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_io/pandas_io_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_io/pandas_io_test.py
--- a/tensorflow/contrib/learn/python/learn/learn_runner_test.py
+++ b/tensorflow/contrib/learn/python/learn/learn_runner_test.py
--- a/tensorflow/contrib/learn/python/learn/metric_spec_test.py
+++ b/tensorflow/contrib/learn/python/learn/metric_spec_test.py
--- a/tensorflow/contrib/learn/python/learn/models.py
+++ b/tensorflow/contrib/learn/python/learn/models.py
--- a/tensorflow/contrib/learn/python/learn/monitors_test.py
+++ b/tensorflow/contrib/learn/python/learn/monitors_test.py
--- a/tensorflow/contrib/learn/python/learn/ops/ops_test.py
+++ b/tensorflow/contrib/learn/python/learn/ops/ops_test.py
--- a/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py
+++ b/tensorflow/contrib/learn/python/learn/ops/seq2seq_ops_test.py
--- a/tensorflow/contrib/learn/python/learn/preprocessing/tests/categorical_test.py
+++ b/tensorflow/contrib/learn/python/learn/preprocessing/tests/categorical_test.py
--- a/tensorflow/contrib/learn/python/learn/preprocessing/tests/categorical_vocabulary_test.py
+++ b/tensorflow/contrib/learn/python/learn/preprocessing/tests/categorical_vocabulary_test.py
--- a/tensorflow/contrib/learn/python/learn/preprocessing/tests/text_test.py
+++ b/tensorflow/contrib/learn/python/learn/preprocessing/tests/text_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/arithmetic_transform_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/arithmetic_transform_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/batch_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/batch_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/binary_transform_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/binary_transform_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/boolean_mask_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/csv_parser_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/csv_parser_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/dataframe_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/dataframe_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/estimator_utils_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/feeding_functions_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/feeding_functions_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/feeding_queue_runner_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/feeding_queue_runner_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/in_memory_source_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/in_memory_source_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/mocks.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/mocks.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/reader_source_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/reader_source_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/series_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/series_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/sparsify_densify_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/sparsify_densify_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/tensorflow_dataframe_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/transform_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/transform_test.py
--- a/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py
+++ b/tensorflow/contrib/learn/python/learn/tests/dataframe/unary_transform_test.py
--- a/tensorflow/contrib/learn/python/learn/utils/export_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/export_test.py
--- a/tensorflow/contrib/learn/python/learn/utils/gc_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/gc_test.py
--- a/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py
+++ b/tensorflow/contrib/learn/python/learn/utils/inspect_checkpoint.py
--- a/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
+++ b/tensorflow/contrib/learn/python/learn/utils/saved_model_export_utils_test.py
--- a/tensorflow/contrib/legacy_seq2seq/BUILD
+++ b/tensorflow/contrib/legacy_seq2seq/BUILD
--- a/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py
+++ b/tensorflow/contrib/legacy_seq2seq/python/kernel_tests/seq2seq_test.py
--- a/tensorflow/contrib/linalg/BUILD
+++ b/tensorflow/contrib/linalg/BUILD
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_composition_test.py
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_diag_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_diag_test.py
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_matrix_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_matrix_test.py
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_test.py
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_tril_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_tril_test.py
--- a/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py
+++ b/tensorflow/contrib/linalg/python/kernel_tests/linear_operator_util_test.py
--- a/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py
+++ b/tensorflow/contrib/linalg/python/ops/linear_operator_test_util.py
--- a/tensorflow/contrib/linear_optimizer/BUILD
+++ b/tensorflow/contrib/linear_optimizer/BUILD
--- a/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/kernel_tests/sdca_ops_test.py
--- a/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sdca_ops.py
--- a/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sharded_mutable_dense_hashtable_test.py
--- a/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py
+++ b/tensorflow/contrib/linear_optimizer/python/ops/sparse_feature_column_test.py
--- a/tensorflow/contrib/lookup/BUILD
+++ b/tensorflow/contrib/lookup/BUILD
--- a/tensorflow/contrib/lookup/lookup_ops_test.py
+++ b/tensorflow/contrib/lookup/lookup_ops_test.py
--- a/tensorflow/contrib/losses/BUILD
+++ b/tensorflow/contrib/losses/BUILD
--- a/tensorflow/contrib/losses/python/losses/loss_ops_test.py
+++ b/tensorflow/contrib/losses/python/losses/loss_ops_test.py
--- a/tensorflow/contrib/metrics/BUILD
+++ b/tensorflow/contrib/metrics/BUILD
--- a/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
+++ b/tensorflow/contrib/metrics/python/kernel_tests/histogram_ops_test.py
--- a/tensorflow/contrib/metrics/python/metrics/classification_test.py
+++ b/tensorflow/contrib/metrics/python/metrics/classification_test.py
--- a/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
+++ b/tensorflow/contrib/metrics/python/ops/metric_ops_test.py
--- a/tensorflow/contrib/ndlstm/BUILD
+++ b/tensorflow/contrib/ndlstm/BUILD
--- a/tensorflow/contrib/ndlstm/python/lstm1d.py
+++ b/tensorflow/contrib/ndlstm/python/lstm1d.py
--- a/tensorflow/contrib/ndlstm/python/lstm1d_test.py
+++ b/tensorflow/contrib/ndlstm/python/lstm1d_test.py
--- a/tensorflow/contrib/ndlstm/python/lstm2d.py
+++ b/tensorflow/contrib/ndlstm/python/lstm2d.py
--- a/tensorflow/contrib/ndlstm/python/lstm2d_test.py
+++ b/tensorflow/contrib/ndlstm/python/lstm2d_test.py
--- a/tensorflow/contrib/ndlstm/python/misc.py
+++ b/tensorflow/contrib/ndlstm/python/misc.py
--- a/tensorflow/contrib/ndlstm/python/misc_test.py
+++ b/tensorflow/contrib/ndlstm/python/misc_test.py
--- a/tensorflow/contrib/opt/BUILD
+++ b/tensorflow/contrib/opt/BUILD
--- a/tensorflow/contrib/opt/python/training/external_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/external_optimizer_test.py
--- a/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/moving_average_optimizer_test.py
--- a/tensorflow/contrib/opt/python/training/variable_clipping_optimizer_test.py
+++ b/tensorflow/contrib/opt/python/training/variable_clipping_optimizer_test.py
--- a/tensorflow/contrib/rnn/BUILD
+++ b/tensorflow/contrib/rnn/BUILD
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_cell_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/core_rnn_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/fused_rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/fused_rnn_cell_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/gru_ops_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/lstm_ops_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_cell_test.py
--- a/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
+++ b/tensorflow/contrib/rnn/python/kernel_tests/rnn_test.py
--- a/tensorflow/contrib/seq2seq/BUILD
+++ b/tensorflow/contrib/seq2seq/BUILD
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_fn_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/decoder_fn_test.py
--- a/tensorflow/contrib/seq2seq/python/kernel_tests/seq2seq_test.py
+++ b/tensorflow/contrib/seq2seq/python/kernel_tests/seq2seq_test.py
--- a/tensorflow/contrib/seq2seq/python/ops/attention_decoder_fn.py
+++ b/tensorflow/contrib/seq2seq/python/ops/attention_decoder_fn.py
--- a/tensorflow/contrib/session_bundle/BUILD
+++ b/tensorflow/contrib/session_bundle/BUILD
--- a/tensorflow/contrib/session_bundle/bundle_shim.py
+++ b/tensorflow/contrib/session_bundle/bundle_shim.py
--- a/tensorflow/contrib/session_bundle/bundle_shim_test.py
+++ b/tensorflow/contrib/session_bundle/bundle_shim_test.py
--- a/tensorflow/contrib/session_bundle/exporter_test.py
+++ b/tensorflow/contrib/session_bundle/exporter_test.py
--- a/tensorflow/contrib/session_bundle/gc_test.py
+++ b/tensorflow/contrib/session_bundle/gc_test.py
--- a/tensorflow/contrib/session_bundle/session_bundle.py
+++ b/tensorflow/contrib/session_bundle/session_bundle.py
--- a/tensorflow/contrib/session_bundle/session_bundle_test.py
+++ b/tensorflow/contrib/session_bundle/session_bundle_test.py
--- a/tensorflow/contrib/slim/BUILD
+++ b/tensorflow/contrib/slim/BUILD
--- a/tensorflow/contrib/slim/__init__.py
+++ b/tensorflow/contrib/slim/__init__.py
--- a/tensorflow/contrib/slim/python/slim/data/BUILD
+++ b/tensorflow/contrib/slim/python/slim/data/BUILD
--- a/tensorflow/contrib/slim/python/slim/data/data_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/data_decoder.py
--- a/tensorflow/contrib/slim/python/slim/data/data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/data_provider.py
--- a/tensorflow/contrib/slim/python/slim/data/dataset.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset.py
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider.py
--- a/tensorflow/contrib/slim/python/slim/data/dataset_data_provider_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/dataset_data_provider_test.py
--- a/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
+++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader.py
--- a/tensorflow/contrib/slim/python/slim/data/parallel_reader_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/parallel_reader_test.py
--- a/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py
+++ b/tensorflow/contrib/slim/python/slim/data/prefetch_queue.py
--- a/tensorflow/contrib/slim/python/slim/data/prefetch_queue_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/prefetch_queue_test.py
--- a/tensorflow/contrib/slim/python/slim/data/test_utils.py
+++ b/tensorflow/contrib/slim/python/slim/data/test_utils.py
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder.py
--- a/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
+++ b/tensorflow/contrib/slim/python/slim/data/tfexample_decoder_test.py
--- a/tensorflow/contrib/slim/python/slim/evaluation.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation.py
--- a/tensorflow/contrib/slim/python/slim/evaluation_test.py
+++ b/tensorflow/contrib/slim/python/slim/evaluation_test.py
--- a/tensorflow/contrib/slim/python/slim/learning.py
+++ b/tensorflow/contrib/slim/python/slim/learning.py
--- a/tensorflow/contrib/slim/python/slim/learning_test.py
+++ b/tensorflow/contrib/slim/python/slim/learning_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/BUILD
+++ b/tensorflow/contrib/slim/python/slim/nets/BUILD
--- a/tensorflow/contrib/slim/python/slim/nets/alexnet.py
+++ b/tensorflow/contrib/slim/python/slim/nets/alexnet.py
--- a/tensorflow/contrib/slim/python/slim/nets/alexnet_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/alexnet_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v1.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v1.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v1_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v1_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v2.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v2.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v2_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v2_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v3.py
--- a/tensorflow/contrib/slim/python/slim/nets/inception_v3_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/inception_v3_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/overfeat.py
+++ b/tensorflow/contrib/slim/python/slim/nets/overfeat.py
--- a/tensorflow/contrib/slim/python/slim/nets/overfeat_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/overfeat_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_utils.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_utils.py
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1.py
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v1_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v2.py
--- a/tensorflow/contrib/slim/python/slim/nets/resnet_v2_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/resnet_v2_test.py
--- a/tensorflow/contrib/slim/python/slim/nets/vgg.py
+++ b/tensorflow/contrib/slim/python/slim/nets/vgg.py
--- a/tensorflow/contrib/slim/python/slim/nets/vgg_test.py
+++ b/tensorflow/contrib/slim/python/slim/nets/vgg_test.py
--- a/tensorflow/contrib/slim/python/slim/queues.py
+++ b/tensorflow/contrib/slim/python/slim/queues.py
--- a/tensorflow/contrib/solvers/BUILD
+++ b/tensorflow/contrib/solvers/BUILD
--- a/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/lanczos_test.py
--- a/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/least_squares_test.py
--- a/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/linear_equations_test.py
--- a/tensorflow/contrib/solvers/python/kernel_tests/util_test.py
+++ b/tensorflow/contrib/solvers/python/kernel_tests/util_test.py
--- a/tensorflow/contrib/solvers/python/ops/lanczos.py
+++ b/tensorflow/contrib/solvers/python/ops/lanczos.py
--- a/tensorflow/contrib/solvers/python/ops/least_squares.py
+++ b/tensorflow/contrib/solvers/python/ops/least_squares.py
--- a/tensorflow/contrib/solvers/python/ops/linear_equations.py
+++ b/tensorflow/contrib/solvers/python/ops/linear_equations.py
--- a/tensorflow/contrib/solvers/python/ops/util.py
+++ b/tensorflow/contrib/solvers/python/ops/util.py
--- a/tensorflow/contrib/specs/BUILD
+++ b/tensorflow/contrib/specs/BUILD
--- a/tensorflow/contrib/specs/README.md
+++ b/tensorflow/contrib/specs/README.md
--- a/tensorflow/contrib/specs/python/specs_ops.py
+++ b/tensorflow/contrib/specs/python/specs_ops.py
--- a/tensorflow/contrib/specs/python/specs_test.py
+++ b/tensorflow/contrib/specs/python/specs_test.py
--- a/tensorflow/contrib/specs/python/summaries.py
+++ b/tensorflow/contrib/specs/python/summaries.py
--- a/tensorflow/contrib/specs/python/summaries_test.py
+++ b/tensorflow/contrib/specs/python/summaries_test.py
--- a/tensorflow/contrib/stat_summarizer/BUILD
+++ b/tensorflow/contrib/stat_summarizer/BUILD
--- a/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py
+++ b/tensorflow/contrib/stat_summarizer/python/stat_summarizer_test.py
--- a/tensorflow/contrib/tensor_forest/BUILD
+++ b/tensorflow/contrib/tensor_forest/BUILD
--- a/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py
+++ b/tensorflow/contrib/tensor_forest/client/eval_metrics_test.py
--- a/tensorflow/contrib/tensor_forest/hybrid/python/hybrid_layer_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/hybrid_layer_test.py
--- a/tensorflow/contrib/tensor_forest/hybrid/python/layers/decisions_to_data_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/layers/decisions_to_data_test.py
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/decisions_to_data_then_nn_test.py
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/forest_to_data_then_nn_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/forest_to_data_then_nn_test.py
--- a/tensorflow/contrib/tensor_forest/hybrid/python/models/k_feature_decisions_to_data_then_nn_test.py
+++ b/tensorflow/contrib/tensor_forest/hybrid/python/models/k_feature_decisions_to_data_then_nn_test.py
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/count_extremely_random_stats_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/count_extremely_random_stats_op_test.py
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/grow_tree_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/grow_tree_op_test.py
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/sample_inputs_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/sample_inputs_op_test.py
--- a/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
+++ b/tensorflow/contrib/tensor_forest/python/kernel_tests/scatter_add_ndim_op_test.py
--- a/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
+++ b/tensorflow/contrib/tensor_forest/python/tensor_forest_test.py
--- a/tensorflow/contrib/tensor_forest/python/topn.py
+++ b/tensorflow/contrib/tensor_forest/python/topn.py
--- a/tensorflow/contrib/tensor_forest/python/topn_test.py
+++ b/tensorflow/contrib/tensor_forest/python/topn_test.py
--- a/tensorflow/contrib/tensorboard/BUILD
+++ b/tensorflow/contrib/tensorboard/BUILD
--- a/tensorflow/contrib/tensorboard/plugins/projector/projector_api_test.py
+++ b/tensorflow/contrib/tensorboard/plugins/projector/projector_api_test.py
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace.py
+++ b/tensorflow/contrib/tensorboard/plugins/trace/trace.py
--- a/tensorflow/contrib/tensorboard/plugins/trace/trace_test.py
+++ b/tensorflow/contrib/tensorboard/plugins/trace/trace_test.py
--- a/tensorflow/contrib/testing/BUILD
+++ b/tensorflow/contrib/testing/BUILD
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/BUILD
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/model_analyzer_test.py
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/print_model_analysis_test.py
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger.py
--- a/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger_test.py
+++ b/tensorflow/contrib/tfprof/python/tools/tfprof/tfprof_logger_test.py
--- a/tensorflow/contrib/training/BUILD
+++ b/tensorflow/contrib/training/BUILD
--- a/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py
+++ b/tensorflow/contrib/training/python/training/batch_sequences_with_states_test.py
--- a/tensorflow/contrib/training/python/training/bucket_ops.py
+++ b/tensorflow/contrib/training/python/training/bucket_ops.py
--- a/tensorflow/contrib/training/python/training/bucket_ops_test.py
+++ b/tensorflow/contrib/training/python/training/bucket_ops_test.py
--- a/tensorflow/contrib/training/python/training/device_setter_test.py
+++ b/tensorflow/contrib/training/python/training/device_setter_test.py
--- a/tensorflow/contrib/training/python/training/evaluation.py
+++ b/tensorflow/contrib/training/python/training/evaluation.py
--- a/tensorflow/contrib/training/python/training/evaluation_test.py
+++ b/tensorflow/contrib/training/python/training/evaluation_test.py
--- a/tensorflow/contrib/training/python/training/failure_tolerator_test.py
+++ b/tensorflow/contrib/training/python/training/failure_tolerator_test.py
--- a/tensorflow/contrib/training/python/training/feeder_test.py
+++ b/tensorflow/contrib/training/python/training/feeder_test.py
--- a/tensorflow/contrib/training/python/training/resample_test.py
+++ b/tensorflow/contrib/training/python/training/resample_test.py
--- a/tensorflow/contrib/training/python/training/sampling_ops.py
+++ b/tensorflow/contrib/training/python/training/sampling_ops.py
--- a/tensorflow/contrib/training/python/training/sampling_ops_test.py
+++ b/tensorflow/contrib/training/python/training/sampling_ops_test.py
--- a/tensorflow/contrib/training/python/training/sampling_ops_threading_test.py
+++ b/tensorflow/contrib/training/python/training/sampling_ops_threading_test.py
--- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
+++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver.py
--- a/tensorflow/contrib/training/python/training/sequence_queueing_state_saver_test.py
+++ b/tensorflow/contrib/training/python/training/sequence_queueing_state_saver_test.py
--- a/tensorflow/contrib/training/python/training/training.py
+++ b/tensorflow/contrib/training/python/training/training.py
--- a/tensorflow/contrib/training/python/training/training_test.py
+++ b/tensorflow/contrib/training/python/training/training_test.py
--- a/tensorflow/examples/image_retraining/BUILD
+++ b/tensorflow/examples/image_retraining/BUILD
--- a/tensorflow/examples/image_retraining/retrain_test.py
+++ b/tensorflow/examples/image_retraining/retrain_test.py
--- a/tensorflow/examples/learn/BUILD
+++ b/tensorflow/examples/learn/BUILD
--- a/tensorflow/examples/learn/boston.py
+++ b/tensorflow/examples/learn/boston.py
--- a/tensorflow/examples/learn/hdf5_classification.py
+++ b/tensorflow/examples/learn/hdf5_classification.py
--- a/tensorflow/examples/learn/iris.py
+++ b/tensorflow/examples/learn/iris.py
--- a/tensorflow/examples/learn/iris_custom_model.py
+++ b/tensorflow/examples/learn/iris_custom_model.py
--- a/tensorflow/examples/learn/iris_val_based_early_stopping.py
+++ b/tensorflow/examples/learn/iris_val_based_early_stopping.py
--- a/tensorflow/examples/learn/iris_with_pipeline.py
+++ b/tensorflow/examples/learn/iris_with_pipeline.py
--- a/tensorflow/examples/learn/mnist.py
+++ b/tensorflow/examples/learn/mnist.py
--- a/tensorflow/examples/learn/multiple_gpu.py
+++ b/tensorflow/examples/learn/multiple_gpu.py
--- a/tensorflow/examples/learn/resnet.py
+++ b/tensorflow/examples/learn/resnet.py
--- a/tensorflow/examples/learn/text_classification.py
+++ b/tensorflow/examples/learn/text_classification.py
--- a/tensorflow/examples/learn/text_classification_character_cnn.py
+++ b/tensorflow/examples/learn/text_classification_character_cnn.py
--- a/tensorflow/examples/learn/text_classification_character_rnn.py
+++ b/tensorflow/examples/learn/text_classification_character_rnn.py
--- a/tensorflow/examples/learn/text_classification_cnn.py
+++ b/tensorflow/examples/learn/text_classification_cnn.py
--- a/tensorflow/examples/tutorials/estimators/BUILD
+++ b/tensorflow/examples/tutorials/estimators/BUILD
--- a/tensorflow/examples/tutorials/mnist/BUILD
+++ b/tensorflow/examples/tutorials/mnist/BUILD
--- a/tensorflow/examples/tutorials/monitors/BUILD
+++ b/tensorflow/examples/tutorials/monitors/BUILD
--- a/tensorflow/examples/tutorials/word2vec/BUILD
+++ b/tensorflow/examples/tutorials/word2vec/BUILD
--- a/tensorflow/python/BUILD
+++ b/tensorflow/python/BUILD
--- a/tensorflow/python/debug/BUILD
+++ b/tensorflow/python/debug/BUILD
--- a/tensorflow/python/debug/session_debug_testlib.py
+++ b/tensorflow/python/debug/session_debug_testlib.py
--- a/tensorflow/python/framework/file_system_test.py
+++ b/tensorflow/python/framework/file_system_test.py
--- a/tensorflow/python/kernel_tests/BUILD
+++ b/tensorflow/python/kernel_tests/BUILD
--- a/tensorflow/python/kernel_tests/conv_ops_test.py
+++ b/tensorflow/python/kernel_tests/conv_ops_test.py
--- a/tensorflow/python/kernel_tests/rnn_test.py
+++ b/tensorflow/python/kernel_tests/rnn_test.py
--- a/tensorflow/python/tools/BUILD
+++ b/tensorflow/python/tools/BUILD
--- a/tensorflow/python/tools/freeze_graph.py
+++ b/tensorflow/python/tools/freeze_graph.py
--- a/tensorflow/python/tools/inspect_checkpoint.py
+++ b/tensorflow/python/tools/inspect_checkpoint.py
--- a/tensorflow/python/tools/optimize_for_inference.py
+++ b/tensorflow/python/tools/optimize_for_inference.py
--- a/tensorflow/python/tools/optimize_for_inference_lib.py
+++ b/tensorflow/python/tools/optimize_for_inference_lib.py
--- a/tensorflow/python/tools/print_selective_registration_header.py
+++ b/tensorflow/python/tools/print_selective_registration_header.py
--- a/tensorflow/python/tools/strip_unused.py
+++ b/tensorflow/python/tools/strip_unused.py
--- a/tensorflow/python/tools/strip_unused_lib.py
+++ b/tensorflow/python/tools/strip_unused_lib.py
--- a/tensorflow/tensorboard/backend/BUILD
+++ b/tensorflow/tensorboard/backend/BUILD
--- a/tensorflow/tensorboard/backend/server_test.py
+++ b/tensorflow/tensorboard/backend/server_test.py
--- a/tensorflow/tensorboard/lib/python/BUILD
+++ b/tensorflow/tensorboard/lib/python/BUILD
--- a/tensorflow/tensorboard/scripts/BUILD
+++ b/tensorflow/tensorboard/scripts/BUILD
--- a/tensorflow/tensorboard/scripts/generate_testdata.py
+++ b/tensorflow/tensorboard/scripts/generate_testdata.py
--- a/tensorflow/tensorboard/scripts/serialize_tensorboard.py
+++ b/tensorflow/tensorboard/scripts/serialize_tensorboard.py
--- a/tensorflow/tools/dist_test/server/BUILD
+++ b/tensorflow/tools/dist_test/server/BUILD
--- a/tensorflow/tools/dist_test/server/grpc_tensorflow_server.py
+++ b/tensorflow/tools/dist_test/server/grpc_tensorflow_server.py
--- a/tensorflow/tools/dist_test/server/parse_cluster_spec_test.py
+++ b/tensorflow/tools/dist_test/server/parse_cluster_spec_test.py
--- a/tensorflow/tools/quantization/BUILD
+++ b/tensorflow/tools/quantization/BUILD
--- a/tensorflow/tools/quantization/graph_to_dot.py
+++ b/tensorflow/tools/quantization/graph_to_dot.py
--- a/tensorflow/tools/quantization/quantize_graph.py
+++ b/tensorflow/tools/quantization/quantize_graph.py
--- a/tensorflow/tools/quantization/quantize_graph_test.py
+++ b/tensorflow/tools/quantization/quantize_graph_test.py
--- a/tensorflow/tools/test/BUILD
+++ b/tensorflow/tools/test/BUILD
--- a/tensorflow/tools/test/gpu_info_lib.py
+++ b/tensorflow/tools/test/gpu_info_lib.py
--- a/tensorflow/tools/test/run_and_gather_logs.py
+++ b/tensorflow/tools/test/run_and_gather_logs.py
--- a/tensorflow/tools/test/run_and_gather_logs_lib.py
+++ b/tensorflow/tools/test/run_and_gather_logs_lib.py
--- a/tensorflow/tools/test/system_info.py
+++ b/tensorflow/tools/test/system_info.py
--- a/tensorflow/tools/test/system_info_lib.py
+++ b/tensorflow/tools/test/system_info_lib.py