提交 7f0cd9c7 编写于 作者: G Gunhan Gulsoy 提交者: TensorFlower Gardener

Automated rollback of change 144338393

Change: 144354566
上级 747bbc69
......@@ -448,24 +448,6 @@ cuda_py_tests(
tags = ["nomsan"], # disable to avoid false positives from scipy.
)
cuda_py_tests(
name = "vector_student_t_test",
size = "small",
srcs = ["python/kernel_tests/vector_student_t_test.py"],
additional_deps = [
":distributions_py",
":distributions_py_CYCLIC_DEPENDENCIES_THAT_NEED_TO_GO",
"//third_party/py/numpy",
"//third_party/py/scipy",
"//tensorflow/python:array_ops",
"//tensorflow/python:client_testlib",
"//tensorflow/python:framework",
"//tensorflow/python:framework_for_generated_wrappers",
"//tensorflow/python:framework_test_lib",
"//tensorflow/python:platform_test",
],
)
cuda_py_tests(
name = "uniform_test",
size = "small",
......
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for MultivariateStudentsT Distribution."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from scipy import linalg
from scipy import special
from tensorflow.contrib.distributions.python.ops.vector_student_t import _VectorStudentT
from tensorflow.python.framework import dtypes
from tensorflow.python.ops import array_ops
from tensorflow.python.platform import test
class _FakeVectorStudentT(object):
"""Fake scipy implementation for Multivariate Student's t-distribution.
Technically we don't need to test the `Vector Student's t-distribution` since
its composed of only unit-tested parts. However this _FakeVectorStudentT
serves as something like an end-to-end test of the
`TransformedDistribution + Affine` API.
Other `Vector*` implementations need only test new code. That we don't need
to test every Vector* distribution is good because there aren't SciPy
analogues and reimplementing everything in NumPy sort of defeats the point of
having the `TransformedDistribution + Affine` API.
"""
def __init__(self, df, shift, scale_tril):
self._df = np.asarray(df)
self._shift = np.asarray(shift)
self._scale_tril = np.asarray(scale_tril)
def log_prob(self, x):
def _compute(df, shift, scale_tril, x):
k = scale_tril.shape[-1]
ildj = np.sum(np.log(np.abs(np.diag(scale_tril))), axis=-1)
logz = ildj + k * (0.5 * np.log(df) +
0.5 * np.log(np.pi) +
special.gammaln(0.5 * df) -
special.gammaln(0.5 * (df + 1.)))
y = linalg.solve_triangular(scale_tril, np.matrix(x - shift).T,
lower=True, overwrite_b=True)
logs = -0.5 * (df + 1.) * np.sum(np.log1p(y**2. / df), axis=-2)
return logs - logz
if not self._df.shape:
return _compute(self._df, self._shift, self._scale_tril, x)
return np.concatenate([
[_compute(self._df[i], self._shift[i], self._scale_tril[i], x[:, i, :])]
for i in range(len(self._df))]).T
def prob(self, x):
return np.exp(self.log_prob(x))
class VectorStudentTTest(test.TestCase):
def setUp(self):
self._rng = np.random.RandomState(42)
def testProbStaticScalar(self):
with self.test_session():
# Scalar batch_shape.
df = np.asarray(3., dtype=np.float32)
# Scalar batch_shape.
shift = np.asarray([1], dtype=np.float32)
scale_diag = np.asarray([2.], dtype=np.float32)
scale_tril = np.diag(scale_diag)
expected_mst = _FakeVectorStudentT(
df=df, shift=shift, scale_tril=scale_tril)
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
x = 2. * self._rng.rand(4, 1).astype(np.float32) - 1.
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(),
rtol=0., atol=1e-5)
def testProbStatic(self):
# Non-scalar batch_shape.
df = np.asarray([1., 2, 3], dtype=np.float32)
# Non-scalar batch_shape.
shift = np.asarray([[0., 0, 0],
[1, 2, 3],
[1, 0, 1]],
dtype=np.float32)
scale_diag = np.asarray([[1., 2, 3],
[2, 3, 4],
[4, 5, 6]],
dtype=np.float32)
scale_tril = np.concatenate([[np.diag(scale_diag[i])]
for i in range(len(scale_diag))])
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=df, shift=shift, scale_tril=scale_tril)
with self.test_session():
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(),
rtol=0., atol=1e-5)
def testProbDynamic(self):
# Non-scalar batch_shape.
df = np.asarray([1., 2, 3], dtype=np.float32)
# Non-scalar batch_shape.
shift = np.asarray([[0., 0, 0],
[1, 2, 3],
[1, 0, 1]],
dtype=np.float32)
scale_diag = np.asarray([[1., 2, 3],
[2, 3, 4],
[4, 5, 6]],
dtype=np.float32)
scale_tril = np.concatenate([[np.diag(scale_diag[i])]
for i in range(len(scale_diag))])
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=df, shift=shift, scale_tril=scale_tril)
with self.test_session():
df_pl = array_ops.placeholder(dtypes.float32, name="df")
shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
def testProbScalarBaseDistributionNonScalarTransform(self):
# Scalar batch_shape.
df = np.asarray(2., dtype=np.float32)
# Non-scalar batch_shape.
shift = np.asarray([[0., 0, 0],
[1, 2, 3],
[1, 0, 1]],
dtype=np.float32)
scale_diag = np.asarray([[1., 2, 3],
[2, 3, 4],
[4, 5, 6]],
dtype=np.float32)
scale_tril = np.concatenate([[np.diag(scale_diag[i])]
for i in range(len(scale_diag))])
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=np.tile(df, len(scale_diag)),
shift=shift,
scale_tril=scale_tril)
with self.test_session():
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(),
rtol=0., atol=1e-5)
def testProbScalarBaseDistributionNonScalarTransformDynamic(self):
# Scalar batch_shape.
df = np.asarray(2., dtype=np.float32)
# Non-scalar batch_shape.
shift = np.asarray([[0., 0, 0],
[1, 2, 3],
[1, 0, 1]],
dtype=np.float32)
scale_diag = np.asarray([[1., 2, 3],
[2, 3, 4],
[4, 5, 6]],
dtype=np.float32)
scale_tril = np.concatenate([[np.diag(scale_diag[i])]
for i in range(len(scale_diag))])
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=np.tile(df, len(scale_diag)),
shift=shift,
scale_tril=scale_tril)
with self.test_session():
df_pl = array_ops.placeholder(dtypes.float32, name="df")
shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
def testProbNonScalarBaseDistributionScalarTransform(self):
# Non-scalar batch_shape.
df = np.asarray([1., 2., 3.], dtype=np.float32)
# Scalar batch_shape.
shift = np.asarray([1, 2, 3], dtype=np.float32)
scale_diag = np.asarray([2, 3, 4], dtype=np.float32)
scale_tril = np.diag(scale_diag)
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=df,
shift=np.tile(shift[None, :], [len(df), 1]),
scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1]))
with self.test_session():
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(),
rtol=0., atol=1e-5)
def testProbNonScalarBaseDistributionScalarTransformDynamic(self):
# Non-scalar batch_shape.
df = np.asarray([1., 2., 3.], dtype=np.float32)
# Scalar batch_shape.
shift = np.asarray([1, 2, 3], dtype=np.float32)
scale_diag = np.asarray([2, 3, 4], dtype=np.float32)
scale_tril = np.diag(scale_diag)
x = 2. * self._rng.rand(4, 3, 3).astype(np.float32) - 1.
expected_mst = _FakeVectorStudentT(
df=df,
shift=np.tile(shift[None, :], [len(df), 1]),
scale_tril=np.tile(scale_tril[None, :, :], [len(df), 1, 1]))
with self.test_session():
df_pl = array_ops.placeholder(dtypes.float32, name="df")
shift_pl = array_ops.placeholder(dtypes.float32, name="shift")
scale_diag_pl = array_ops.placeholder(dtypes.float32, name="scale_diag")
feed_dict = {df_pl: df, shift_pl: shift, scale_diag_pl: scale_diag}
actual_mst = _VectorStudentT(df=df, shift=shift, scale_diag=scale_diag,
validate_args=True)
self.assertAllClose(expected_mst.log_prob(x),
actual_mst.log_prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
self.assertAllClose(expected_mst.prob(x),
actual_mst.prob(x).eval(feed_dict=feed_dict),
rtol=0., atol=1e-5)
if __name__ == "__main__":
test.main()
......@@ -1590,7 +1590,6 @@ class Affine(Bijector):
`scale_diag` has shape [N1, N2, ... k, k], which represents a k x k
lower triangular matrix.
When `None` no `scale_tril` term is added to `scale`.
The upper triangular elements above the diagonal are ignored.
scale_perturb_factor: Numeric `Tensor` representing factor matrix with
last two dimensions of shape `(k, r)`.
When `None`, no rank-r update is added to `scale`.
......
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Vector Student's t distribution classes."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tensorflow.contrib.distributions.python.ops import bijector as bijectors
from tensorflow.contrib.distributions.python.ops import distribution_util
from tensorflow.contrib.distributions.python.ops import student_t
from tensorflow.contrib.distributions.python.ops import transformed_distribution
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.ops import array_ops
# TODO(jvdillon): Add unittests for this once we know where will put this code
# and how it will generally be used. In the interim this code is tested via the
# _VectorStudentT tests.
def _infer_shapes(scale_oppd, shift):
"""Helper which returns batch_shape, event_shape from `Affine` properties.
The `Affine` `Bijector` (roughly) computes `Y = scale @ X.T + shift`. This
function infers the `batch_shape` and `event_shape` from the `scale` and
`shift` terms.
Args:
scale_oppd: Instance of OperatorPDBase subclass representing the `Affine`
`Bijector` scale matrix.
shift: `Tensor` representing the `shift` vector.
Returns:
batch_shape: 1D, integer `Tensor` representing the shape of batch
dimensions.
event_shape: 1D, integer `Tensor` representing the shape of event
dimensions.
Raises:
ValueError: if we are not able to infer batch/event shapes from the args.
"""
# Collect known static shape.
def _has_static_ndims(x):
return x is not None and x.get_shape().ndims is not None
if _has_static_ndims(scale_oppd) and _has_static_ndims(shift):
batch_shape = scale_oppd.get_batch_shape().merge_with(
shift.get_shape()[:-1])
event_shape = scale_oppd.get_shape()[-1:].merge_with(
shift.get_shape()[-1:])
elif _has_static_ndims(scale_oppd):
batch_shape = scale_oppd.get_batch_shape()
event_shape = scale_oppd.get_shape()[-1:]
elif _has_static_ndims(shift):
batch_shape = shift.get_shape()[:-1]
event_shape = shift.get_shape()[-1:]
else:
batch_shape = tensor_shape.TensorShape(None)
event_shape = tensor_shape.TensorShape(None)
# Convert TensorShape to Tensors and see if we're done.
if batch_shape.is_fully_defined():
batch_shape = constant_op.constant(batch_shape.as_list(),
dtype=dtypes.int32)
else:
batch_shape = None
if event_shape.is_fully_defined():
event_shape = constant_op.constant(event_shape.as_list(),
dtype=dtypes.int32)
else:
event_shape = None
if batch_shape is not None and event_shape is not None:
return batch_shape, event_shape
# Collect known dynamic shape.
if scale_oppd is not None:
shape = scale_oppd.shape()
elif shift is not None:
shape = array_ops.shape(shift)
else:
raise ValueError("unable to infer batch_shape, event_shape")
# Fill in what we don't know.
if batch_shape is None:
batch_shape = array_ops.identity(shape[:-1], name="batch_shape")
if event_shape is None:
event_shape = array_ops.identity(shape[-1:], name="event_shape")
return batch_shape, event_shape
class _VectorStudentT(transformed_distribution.TransformedDistribution):
"""A vector version of Student's t-distribution on `R^k`.
#### Mathematical details
Write `S` for the scale matrix (in R^{k x k}) and `mu` for the mean (in R^k).
The PDF of this distribution is:
```none
f(x) = (1 + y y.T / df)**(-0.5 (df + 1)) / Z
where,
y(x) = inv(S) (x - mu)
Z = abs(det(S)) ( sqrt(df pi) Gamma(0.5 df) / Gamma(0.5 (df + 1)) )**k
```
Notice that the matrix `S` has semantics more similar to standard deviation
than covariance.
This distribution is an Affine transformation of iid
[Student's t-distributions](
https://en.wikipedia.org/wiki/Student%27s_t-distribution)
and should not be confused with the [Multivate Student's t-distribution](
https://en.wikipedia.org/wiki/Multivariate_t-distribution). The
traditional Multivariate Student's t-distribution is type of
[elliptical distribution](
https://en.wikipedia.org/wiki/Elliptical_distribution); it has PDF:
```none
f(x) = (1 + y y.T / df)**(-0.5 (df + k)) / Z
where,
y(x) = inv(S) (x - mu)
Z = abs(det(S)) sqrt(df pi)**k Gamma(0.5 df) / Gamma(0.5 (df + k))
```
Notice that the Multivariate Student's t-distribution uses `k` where the
Vector Student's t-distribution has a `1`. Conversely the Vector version has a
broader application of the power-`k` in the normalization.
#### Examples
A single instance of a "Vector Student's t-distribution" is defined by a mean
vector of of length `k` and a scale matrix of shape `k x k`.
Extra leading dimensions, if provided, allow for batches.
```python
ds = tf.contrib.distributions
# Initialize a single 3-variate vector Student's t-distribution.
mu = [1., 2, 3]
chol = [[1., 0, 0.],
[1, 3, 0],
[1, 2, 3]]
vt = ds.VectorStudentT(df=2, shift=mu, scale_tril=chol)
# Evaluate this on an observation in R^3, returning a scalar.
vt.prob([-1., 0, 1])
# Initialize a batch of two 3-variate vector Student's t-distributions.
mu = [[1., 2, 3],
[11, 22, 33]]
chol = ... # shape 2 x 3 x 3, lower triangular, positive diagonal.
vt = ds.VectorStudentT(shift=mu, scale_tril=chol)
# Evaluate this on a two observations, each in R^3, returning a length two
# tensor.
x = [[-1, 0, 1],
[-11, 0, 11]]
vt.prob(x)
```
For more examples of how to construct the `scale` matrix, see the
`bijector.Affine` docstring.
"""
def __init__(self,
df,
shift=None,
scale_identity_multiplier=None,
scale_diag=None,
scale_tril=None,
scale_perturb_factor=None,
scale_perturb_diag=None,
validate_args=False,
allow_nan_stats=True,
name="VectorStudentT"):
"""Instantiates the vector Student's t-distributions on `R^k`.
The `batch_shape` is the broadcast between `df.batch_shape` and
`Affine.batch_shape` where `Affine` is constructed from `shift` and
`scale_*` arguments.
The `event_shape` is the event shape of `Affine.event_shape`.
Args:
df: Numeric `Tensor`. The degrees of freedom of the distribution(s).
`df` must contain only positive values.
Must be scalar if `shift`, `scale_*` imply non-scalar batch_shape or
must have the same `batch_shape` implied by `shift`, `scale_*`.
shift: Numeric `Tensor`. If this is set to `None`, no `shift` is applied.
scale_identity_multiplier: floating point rank 0 `Tensor` representing a
scaling done to the identity matrix.
When `scale_identity_multiplier = scale_diag=scale_tril = None` then
`scale += IdentityMatrix`. Otherwise no scaled-identity-matrix is added
to `scale`.
scale_diag: Numeric `Tensor` representing the diagonal matrix.
`scale_diag` has shape [N1, N2, ... k], which represents a k x k
diagonal matrix.
When `None` no diagonal term is added to `scale`.
scale_tril: Numeric `Tensor` representing the diagonal matrix.
`scale_diag` has shape [N1, N2, ... k, k], which represents a k x k
lower triangular matrix.
When `None` no `scale_tril` term is added to `scale`.
The upper triangular elements above the diagonal are ignored.
scale_perturb_factor: Numeric `Tensor` representing factor matrix with
last two dimensions of shape `(k, r)`.
When `None`, no rank-r update is added to `scale`.
scale_perturb_diag: Numeric `Tensor` representing the diagonal matrix.
`scale_perturb_diag` has shape [N1, N2, ... r], which represents an
r x r Diagonal matrix.
When `None` low rank updates will take the form `scale_perturb_factor *
scale_perturb_factor.T`.
validate_args: `Boolean`, default `False`. Whether to validate input
with asserts. If `validate_args` is `False`, and the inputs are
invalid, correct behavior is not guaranteed.
allow_nan_stats: `Boolean`, default `True`. If `False`, raise an
exception if a statistic (e.g. mean/mode/etc...) is undefined for any
batch member If `True`, batch members with valid parameters leading to
undefined statistics will return NaN for this statistic.
name: The name to give Ops created by the initializer.
"""
parameters = locals()
parameters.pop("self")
graph_parents = [df, shift, scale_identity_multiplier, scale_diag,
scale_tril, scale_perturb_factor, scale_perturb_diag]
with ops.name_scope(name) as ns:
with ops.name_scope("init", values=graph_parents):
# The shape of the _VectorStudentT distribution is governed by the
# relationship between df.batch_shape and affine.batch_shape. In
# pseudocode the basic procedure is:
# if df.batch_shape is scalar:
# if affine.batch_shape is not scalar:
# # broadcast self._distribution.sample so
# # it has affine.batch_shape.
# self.batch_shape = affine.batch_shape
# else:
# if affine.batch_shape is scalar:
# # let affine broadcasting do its thing.
# self.batch_shape = df.batch_shape
# All of the above magic is actually handled by TransformedDistribution.
# Here we really only need to collect the affine.batch_shape and decide
# what we're going to pass in to TransformedDistribution's
# (override) batch_shape arg.
self._distribution = student_t.StudentT(df=df, mu=0., sigma=1.)
self._affine = bijectors.Affine(
shift=shift,
scale_identity_multiplier=scale_identity_multiplier,
scale_diag=scale_diag,
scale_tril=scale_tril,
scale_perturb_factor=scale_perturb_factor,
scale_perturb_diag=scale_perturb_diag,
validate_args=validate_args)
self._batch_shape, self._override_event_shape = _infer_shapes(
self.scale, self.shift)
self._override_batch_shape = distribution_util.pick_vector(
self._distribution.is_scalar_batch(),
self._batch_shape,
constant_op.constant([], dtype=dtypes.int32))
super(_VectorStudentT, self).__init__(
distribution=self._distribution,
bijector=self._affine,
batch_shape=self._override_batch_shape,
event_shape=self._override_event_shape,
validate_args=validate_args,
name=ns)
@property
def df(self):
"""Degrees of freedom in these Student's t distribution(s)."""
return self._distribution.df
@property
def shift(self):
"""Locations of these Student's t distribution(s)."""
return self._affine.shift
@property
def scale(self):
"""Dense (batch) covariance matrix, if available."""
return self._affine.scale
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册