Automated rollback of change 144338393

Change: 144354566

Automated rollback of change 144338393
Change: 144354566
7f0cd9c7 · Gunhan Gulsoy · TensorFlower Gardener · 747bbc69 · 7f0cd9c7 · 747bbc69
4 changed file
--- a/tensorflow/contrib/distributions/BUILD
+++ b/tensorflow/contrib/distributions/BUILD
@@ -448,24 +448,6 @@ cuda_py_tests(
    tags = ["nomsan"],  # disable to avoid false positives from scipy.
 )

-cuda_py_tests(
-    name = "vector_student_t_test",
-    size = "small",
-    srcs = ["python/kernel_tests/vector_student_t_test.py"],
-    additional_deps = [
-        ":distributions_py",
-        ":distributions_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO",
-        "//third_party/py/numpy",
-        "//third_party/py/scipy",
-        "//tensorflow/python:array_ops",
-        "//tensorflow/python:client_testlib",
-        "//tensorflow/python:framework",
-        "//tensorflow/python:framework_for_generated_wrappers",
-        "//tensorflow/python:framework_test_lib",
-        "//tensorflow/python:platform_test",
-    ],
-)
-
 cuda_py_tests(
    name = "uniform_test",
    size = "small",

--- a/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py
+++ b/tensorflow/contrib/distributions/python/kernel_tests/vector_student_t_test.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for MultivariateStudentsT Distribution."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from scipy import linalg
-from scipy import special
-
-from tensorflow.contrib.distributions.python.ops.vector_student_t import _VectorStudentT
-from tensorflow.python.framework import dtypes
-from tensorflow.python.ops import array_ops
-from tensorflow.python.platform import test
-
-
-class _FakeVectorStudentT(object):
-  """Fake scipy implementation for Multivariate Student's t-distribution.
-
-  Technically we don't need to test the `Vector Student's t-distribution` since
-  its composed of only unit-tested parts.  However this _FakeVectorStudentT
-  serves as something like an end-to-end test of the
-  `TransformedDistribution + Affine` API.
-
-  Other `Vector*` implementations need only test new code. That we don't need
-  to test every Vector* distribution is good because there aren't SciPy
-  analogues and reimplementing everything in NumPy sort of defeats the point of
-  having the `TransformedDistribution + Affine` API.
-  """
-
-  def __init__(self, df, shift, scale_tril):
-    self._df = np.asarray(df)
-    self._shift = np.asarray(shift)
-    self._scale_tril = np.asarray(scale_tril)
-
-  def log_prob(self, x):
-    def _compute(df, shift, scale_tril, x):
-      k = scale_tril.shape[-1]
-      ildj = np.sum(np.log(np.abs(np.diag(scale_tril))), axis=-1)
-      logz = ildj + k * (0.5 * np.log(df) +
-                         0.5 * np.log(np.pi) +
-                         special.gammaln(0.5 * df) -
-                         special.gammaln(0.5 * (df + 1.)))
-      y = linalg.solve_triangular(scale_tril, np.matrix(x - shift).T,
-                                  lower=True, overwrite_b=True)
-      logs = -0.5 * (df + 1.) * np.sum(np.log1p(y**2. / df), axis=-2)
-      return logs - logz
-    if not self._df.shape:
-      return _compute(self._df, self._shift, self._scale_tril, x)
-    return np.concatenate([
-        [_compute(self._df[i], self._shift[i], self._scale_tril[i], x[:, i, :])]
-        for i in range(len(self._df))]).T
-
-  def prob(self, x):
-    return np.exp(self.log_prob(x))
-
-
-class VectorStudentTTest(test.TestCase):
-
-  def setUp(self):
-    self._rng = np.random.RandomState(42)
-
-  def testProbStaticScalar(self):
-    with self.test_session():
-      # Scalar batch_shape.
-      df = np.asarray(3., dtype=np.float32)
-      # Scalar batch_shape.
-      shift = np.asarray([1], dtype=np.float32)
-      scale_diag = np.asarray([2.], dtype=np.float32)
-      scale_tril = np.diag(scale_diag)
-
-      expected_mst = _FakeVectorStudentT(
-          df=df, shift=shift, scale_tril=scale_tril)
-
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      x = 2. * self._rng.rand(4, 1).astype(np.float32) - 1.
-
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(),
-                          rtol=0., atol=1e-5)
-
-  def testProbStatic(self):
-    # Non-scalar batch_shape.
-    df = np.asarray([1., 2, 3], dtype=np.float32)
-    # Non-scalar batch_shape.
-    shift = np.asarray([[0., 0, 0],
-                        [1, 2, 3],
-                        [1, 0, 1]],
-                       dtype=np.float32)
-    scale_diag = np.asarray([[1., 2, 3],
-                             [2, 3, 4],
-                             [4, 5, 6]],
-                            dtype=np.float32)
-    scale_tril = np.concatenate([[np.diag(scale_diag[i])]
-                                 for i in range(len(scale_diag))])
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=df, shift=shift, scale_tril=scale_tril)
-
-    with self.test_session():
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(),
-                          rtol=0., atol=1e-5)
-
-  def testProbDynamic(self):
-    # Non-scalar batch_shape.
-    df = np.asarray([1., 2, 3], dtype=np.float32)
-    # Non-scalar batch_shape.
-    shift = np.asarray([[0., 0, 0],
-                        [1, 2, 3],
-                        [1, 0, 1]],
-                       dtype=np.float32)
-    scale_diag = np.asarray([[1., 2, 3],
-                             [2, 3, 4],
-                             [4, 5, 6]],
-                            dtype=np.float32)
-    scale_tril = np.concatenate([[np.diag(scale_diag[i])]
-                                 for i in range(len(scale_diag))])
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=df, shift=shift, scale_tril=scale_tril)
-
-    with self.test_session():
-      df_pl = array_ops.placeholder(dtypes.float32, name="df")
-      shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
-      scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
-      feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-
-  def testProbScalarBaseDistributionNonScalarTransform(self):
-    # Scalar batch_shape.
-    df = np.asarray(2., dtype=np.float32)
-    # Non-scalar batch_shape.
-    shift = np.asarray([[0., 0, 0],
-                        [1, 2, 3],
-                        [1, 0, 1]],
-                       dtype=np.float32)
-    scale_diag = np.asarray([[1., 2, 3],
-                             [2, 3, 4],
-                             [4, 5, 6]],
-                            dtype=np.float32)
-    scale_tril = np.concatenate([[np.diag(scale_diag[i])]
-                                 for i in range(len(scale_diag))])
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=np.tile(df, len(scale_diag)),
-        shift=shift,
-        scale_tril=scale_tril)
-
-    with self.test_session():
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(),
-                          rtol=0., atol=1e-5)
-
-  def testProbScalarBaseDistributionNonScalarTransformDynamic(self):
-    # Scalar batch_shape.
-    df = np.asarray(2., dtype=np.float32)
-    # Non-scalar batch_shape.
-    shift = np.asarray([[0., 0, 0],
-                        [1, 2, 3],
-                        [1, 0, 1]],
-                       dtype=np.float32)
-    scale_diag = np.asarray([[1., 2, 3],
-                             [2, 3, 4],
-                             [4, 5, 6]],
-                            dtype=np.float32)
-    scale_tril = np.concatenate([[np.diag(scale_diag[i])]
-                                 for i in range(len(scale_diag))])
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=np.tile(df, len(scale_diag)),
-        shift=shift,
-        scale_tril=scale_tril)
-
-    with self.test_session():
-      df_pl = array_ops.placeholder(dtypes.float32, name="df")
-      shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
-      scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
-      feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-
-  def testProbNonScalarBaseDistributionScalarTransform(self):
-    # Non-scalar batch_shape.
-    df = np.asarray([1., 2., 3.], dtype=np.float32)
-    # Scalar batch_shape.
-    shift = np.asarray([1, 2, 3], dtype=np.float32)
-    scale_diag = np.asarray([2, 3, 4], dtype=np.float32)
-    scale_tril = np.diag(scale_diag)
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=df,
-        shift=np.tile(shift[None, :], [len(df), 1]),
-        scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1]))
-
-    with self.test_session():
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(),
-                          rtol=0., atol=1e-5)
-
-  def testProbNonScalarBaseDistributionScalarTransformDynamic(self):
-    # Non-scalar batch_shape.
-    df = np.asarray([1., 2., 3.], dtype=np.float32)
-    # Scalar batch_shape.
-    shift = np.asarray([1, 2, 3], dtype=np.float32)
-    scale_diag = np.asarray([2, 3, 4], dtype=np.float32)
-    scale_tril = np.diag(scale_diag)
-
-    x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
-
-    expected_mst = _FakeVectorStudentT(
-        df=df,
-        shift=np.tile(shift[None, :], [len(df), 1]),
-        scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1]))
-
-    with self.test_session():
-      df_pl = array_ops.placeholder(dtypes.float32, name="df")
-      shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
-      scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
-      feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
-      actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
-                                   validate_args=True)
-      self.assertAllClose(expected_mst.log_prob(x),
-                          actual_mst.log_prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-      self.assertAllClose(expected_mst.prob(x),
-                          actual_mst.prob(x).eval(feed_dict=feed_dict),
-                          rtol=0., atol=1e-5)
-
-
-if __name__ == "__main__":
-  test.main()
--- a/tensorflow/contrib/distributions/python/ops/bijector.py
+++ b/tensorflow/contrib/distributions/python/ops/bijector.py
@@ -1590,7 +1590,6 @@ class Affine(Bijector):
        `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k
        lower triangular matrix.
        When `None` no `scale_tril` term is added to `scale`.
-        The upper triangular elements above the diagonal are ignored.
      scale_perturb_factor: Numeric `Tensor` representing factor matrix with
        last two dimensions of shape `(k, r)`.
        When `None`, no rank-r update is added to `scale`.

--- a/tensorflow/contrib/distributions/python/ops/vector_student_t.py
+++ b/tensorflow/contrib/distributions/python/ops/vector_student_t.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Vector Student's t distribution classes."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow.contrib.distributions.python.ops import bijector as bijectors
-from tensorflow.contrib.distributions.python.ops import distribution_util
-from tensorflow.contrib.distributions.python.ops import student_t
-from tensorflow.contrib.distributions.python.ops import transformed_distribution
-from tensorflow.python.framework import constant_op
-from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_shape
-from tensorflow.python.ops import array_ops
-
-
-# TODO(jvdillon): Add unittests for this once we know where will put this code
-# and how it will generally be used. In the interim this code is tested via the
-# _VectorStudentT tests.
-def _infer_shapes(scale_oppd, shift):
-  """Helper which returns batch_shape, event_shape from `Affine` properties.
-
-  The `Affine` `Bijector` (roughly) computes `Y = scale @ X.T + shift`. This
-  function infers the `batch_shape` and `event_shape` from the `scale` and
-  `shift` terms.
-
-  Args:
-    scale_oppd: Instance of OperatorPDBase subclass representing the `Affine`
-      `Bijector` scale matrix.
-    shift: `Tensor` representing the `shift` vector.
-
-  Returns:
-    batch_shape: 1D, integer `Tensor` representing the shape of batch
-      dimensions.
-    event_shape: 1D, integer `Tensor` representing the shape of event
-      dimensions.
-
-  Raises:
-    ValueError: if we are not able to infer batch/event shapes from the args.
-  """
-  # Collect known static shape.
-  def _has_static_ndims(x):
-    return x is not None and x.get_shape().ndims is not None
-  if _has_static_ndims(scale_oppd) and _has_static_ndims(shift):
-    batch_shape = scale_oppd.get_batch_shape().merge_with(
-        shift.get_shape()[:-1])
-    event_shape = scale_oppd.get_shape()[-1:].merge_with(
-        shift.get_shape()[-1:])
-  elif _has_static_ndims(scale_oppd):
-    batch_shape = scale_oppd.get_batch_shape()
-    event_shape = scale_oppd.get_shape()[-1:]
-  elif _has_static_ndims(shift):
-    batch_shape = shift.get_shape()[:-1]
-    event_shape = shift.get_shape()[-1:]
-  else:
-    batch_shape = tensor_shape.TensorShape(None)
-    event_shape = tensor_shape.TensorShape(None)
-
-  # Convert TensorShape to Tensors and see if we're done.
-  if batch_shape.is_fully_defined():
-    batch_shape = constant_op.constant(batch_shape.as_list(),
-                                       dtype=dtypes.int32)
-  else:
-    batch_shape = None
-  if event_shape.is_fully_defined():
-    event_shape = constant_op.constant(event_shape.as_list(),
-                                       dtype=dtypes.int32)
-  else:
-    event_shape = None
-  if batch_shape is not None and event_shape is not None:
-    return batch_shape, event_shape
-
-  # Collect known dynamic shape.
-  if scale_oppd is not None:
-    shape = scale_oppd.shape()
-  elif shift is not None:
-    shape = array_ops.shape(shift)
-  else:
-    raise ValueError("unable to infer batch_shape, event_shape")
-
-  # Fill in what we don't know.
-  if batch_shape is None:
-    batch_shape = array_ops.identity(shape[:-1], name="batch_shape")
-  if event_shape is None:
-    event_shape = array_ops.identity(shape[-1:], name="event_shape")
-
-  return batch_shape, event_shape
-
-
-class _VectorStudentT(transformed_distribution.TransformedDistribution):
-  """A vector version of Student's t-distribution on `R^k`.
-
-  #### Mathematical details
-
-  Write `S` for the scale matrix (in R^{k x k}) and `mu` for the mean (in R^k).
-  The PDF of this distribution is:
-
-  ```none
-  f(x) = (1 + y y.T / df)**(-0.5 (df + 1)) / Z
-  where,
-  y(x) = inv(S) (x - mu)
-  Z    = abs(det(S)) ( sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) )**k
-  ```
-
-  Notice that the matrix `S` has semantics more similar to standard deviation
-  than covariance.
-
-  This distribution is an Affine transformation of iid
-  [Student's t-distributions](
-  https://en.wikipedia.org/wiki/Student%27s_t-distribution)
-  and should not be confused with the [Multivate Student's t-distribution](
-  https://en.wikipedia.org/wiki/Multivariate_t-distribution).  The
-  traditional Multivariate Student's t-distribution is type of
-  [elliptical distribution](
-  https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF:
-
-  ```none
-  f(x) = (1 + y y.T / df)**(-0.5 (df + k)) / Z
-  where,
-  y(x) = inv(S) (x - mu)
-  Z    = abs(det(S)) sqrt(df pi)**k Gamma(0.5 df) / Gamma(0.5 (df + k))
-  ```
-
-  Notice that the Multivariate Student's t-distribution uses `k` where the
-  Vector Student's t-distribution has a `1`. Conversely the Vector version has a
-  broader application of the power-`k` in the normalization.
-
-  #### Examples
-
-  A single instance of a "Vector Student's t-distribution" is defined by a mean
-  vector of of length `k` and a scale matrix of shape `k x k`.
-
-  Extra leading dimensions, if provided, allow for batches.
-
-  ```python
-  ds = tf.contrib.distributions
-
-  # Initialize a single 3-variate vector Student's t-distribution.
-  mu = [1., 2, 3]
-  chol = [[1., 0, 0.],
-          [1, 3, 0],
-          [1, 2, 3]]
-  vt = ds.VectorStudentT(df=2, shift=mu, scale_tril=chol)
-
-  # Evaluate this on an observation in R^3, returning a scalar.
-  vt.prob([-1., 0, 1])
-
-  # Initialize a batch of two 3-variate vector Student's t-distributions.
-  mu = [[1., 2, 3],
-        [11, 22, 33]]
-  chol = ...  # shape 2 x 3 x 3, lower triangular, positive diagonal.
-  vt = ds.VectorStudentT(shift=mu, scale_tril=chol)
-
-  # Evaluate this on a two observations, each in R^3, returning a length two
-  # tensor.
-  x = [[-1, 0, 1],
-       [-11, 0, 11]]
-  vt.prob(x)
-  ```
-
-  For more examples of how to construct the `scale` matrix, see the
-  `bijector.Affine` docstring.
-
-  """
-
-  def __init__(self,
-               df,
-               shift=None,
-               scale_identity_multiplier=None,
-               scale_diag=None,
-               scale_tril=None,
-               scale_perturb_factor=None,
-               scale_perturb_diag=None,
-               validate_args=False,
-               allow_nan_stats=True,
-               name="VectorStudentT"):
-    """Instantiates the vector Student's t-distributions on `R^k`.
-
-    The `batch_shape` is the broadcast between `df.batch_shape` and
-    `Affine.batch_shape` where `Affine` is constructed from `shift` and
-    `scale_*` arguments.
-
-    The `event_shape` is the event shape of `Affine.event_shape`.
-
-    Args:
-      df: Numeric `Tensor`. The degrees of freedom of the distribution(s).
-        `df` must contain only positive values.
-        Must be scalar if `shift`, `scale_*` imply non-scalar batch_shape or
-        must have the same `batch_shape` implied by `shift`, `scale_*`.
-      shift: Numeric `Tensor`.  If this is set to `None`, no `shift` is applied.
-      scale_identity_multiplier: floating point rank 0 `Tensor` representing a
-        scaling done to the identity matrix.
-        When `scale_identity_multiplier = scale_diag=scale_tril = None` then
-        `scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
-        to `scale`.
-      scale_diag: Numeric `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ... k], which represents a k x k
-        diagonal matrix.
-        When `None` no diagonal term is added to `scale`.
-      scale_tril: Numeric `Tensor` representing the diagonal matrix.
-        `scale_diag` has shape [N1, N2, ... k, k], which represents a k x k
-        lower triangular matrix.
-        When `None` no `scale_tril` term is added to `scale`.
-        The upper triangular elements above the diagonal are ignored.
-      scale_perturb_factor: Numeric `Tensor` representing factor matrix with
-        last two dimensions of shape `(k, r)`.
-        When `None`, no rank-r update is added to `scale`.
-      scale_perturb_diag: Numeric `Tensor` representing the diagonal matrix.
-        `scale_perturb_diag` has shape [N1, N2, ... r], which represents an
-        r x r Diagonal matrix.
-        When `None` low rank updates will take the form `scale_perturb_factor *
-        scale_perturb_factor.T`.
-      validate_args: `Boolean`, default `False`.  Whether to validate input
-        with asserts.  If `validate_args` is `False`, and the inputs are
-        invalid, correct behavior is not guaranteed.
-      allow_nan_stats: `Boolean`, default `True`.  If `False`, raise an
-        exception if a statistic (e.g. mean/mode/etc...) is undefined for any
-        batch member If `True`, batch members with valid parameters leading to
-        undefined statistics will return NaN for this statistic.
-      name: The name to give Ops created by the initializer.
-    """
-    parameters = locals()
-    parameters.pop("self")
-    graph_parents = [df, shift, scale_identity_multiplier, scale_diag,
-                     scale_tril, scale_perturb_factor, scale_perturb_diag]
-    with ops.name_scope(name) as ns:
-      with ops.name_scope("init", values=graph_parents):
-        # The shape of the _VectorStudentT distribution is governed by the
-        # relationship between df.batch_shape and affine.batch_shape. In
-        # pseudocode the basic procedure is:
-        #   if df.batch_shape is scalar:
-        #     if affine.batch_shape is not scalar:
-        #       # broadcast self._distribution.sample so
-        #       # it has affine.batch_shape.
-        #     self.batch_shape = affine.batch_shape
-        #   else:
-        #     if affine.batch_shape is scalar:
-        #       # let affine broadcasting do its thing.
-        #     self.batch_shape = df.batch_shape
-        # All of the above magic is actually handled by TransformedDistribution.
-        # Here we really only need to collect the affine.batch_shape and decide
-        # what we're going to pass in to TransformedDistribution's
-        # (override) batch_shape arg.
-        self._distribution = student_t.StudentT(df=df, mu=0., sigma=1.)
-        self._affine = bijectors.Affine(
-            shift=shift,
-            scale_identity_multiplier=scale_identity_multiplier,
-            scale_diag=scale_diag,
-            scale_tril=scale_tril,
-            scale_perturb_factor=scale_perturb_factor,
-            scale_perturb_diag=scale_perturb_diag,
-            validate_args=validate_args)
-        self._batch_shape, self._override_event_shape = _infer_shapes(
-            self.scale, self.shift)
-        self._override_batch_shape = distribution_util.pick_vector(
-            self._distribution.is_scalar_batch(),
-            self._batch_shape,
-            constant_op.constant([], dtype=dtypes.int32))
-        super(_VectorStudentT, self).__init__(
-            distribution=self._distribution,
-            bijector=self._affine,
-            batch_shape=self._override_batch_shape,
-            event_shape=self._override_event_shape,
-            validate_args=validate_args,
-            name=ns)
-
-  @property
-  def df(self):
-    """Degrees of freedom in these Student's t distribution(s)."""
-    return self._distribution.df
-
-  @property
-  def shift(self):
-    """Locations of these Student's t distribution(s)."""
-    return self._affine.shift
-
-  @property
-  def scale(self):
-    """Dense (batch) covariance matrix, if available."""
-    return self._affine.scale