Merge pull request #1156 from jburnim/r0.12

Prepare branch for 0.12.0-rc1 release
tensorflow · Nov 11, 2020 · 7784466 · 7784466
2 parents 7d58365 + 20ba309
commit 7784466
Show file tree

Hide file tree

Showing 24 changed files with 254 additions and 218 deletions.
diff --git a/spinoffs/oryx/oryx/version.py b/spinoffs/oryx/oryx/version.py
@@ -17,7 +17,7 @@
 # We follow Semantic Versioning (https://semver.org/)
 _MAJOR_VERSION = '0'
 _MINOR_VERSION = '1'
-_PATCH_VERSION = '2'
+_PATCH_VERSION = '3'
 
 # When building releases, we can update this value on the release branch to
 # reflect the current release candidate ('rc0', 'rc1') or, finally, the official

diff --git a/spinoffs/oryx/setup.py b/spinoffs/oryx/setup.py
@@ -20,11 +20,10 @@
 
 REQUIRED_PACKAGES = [
     'dataclasses;python_version<"3.7"',
-    'jax==0.2.0',
-    'jaxlib==0.1.55',
+    'jax==0.2.5',
+    'jaxlib==0.1.56',
     # Pin a TF version while TFP-on-JAX still depends on TF
-    'tfp-nightly==0.12.0.dev20200923',
-    'inference_gym',
+    'tfp-nightly==0.12.0.dev20201107',
 ]
 
 

diff --git a/tensorflow_probability/python/bijectors/BUILD b/tensorflow_probability/python/bijectors/BUILD
@@ -130,6 +130,7 @@ multi_substrate_py_library(
         "//tensorflow_probability/python/internal:name_util",
         "//tensorflow_probability/python/internal:nest_util",
         "//tensorflow_probability/python/internal:tensorshape_util",
+        "//tensorflow_probability/python/math:gradient",
     ],
 )
 

diff --git a/tensorflow_probability/python/bijectors/bijector.py b/tensorflow_probability/python/bijectors/bijector.py
@@ -32,6 +32,7 @@
 from tensorflow_probability.python.internal import name_util
 from tensorflow_probability.python.internal import nest_util
 from tensorflow_probability.python.internal import prefer_static as ps
+from tensorflow_probability.python.math import gradient
 from tensorflow.python.util import nest  # pylint: disable=g-direct-tensorflow-import
 
 
@@ -604,6 +605,11 @@ def _is_injective(self):
     """
     return True
 
+  @property
+  def _is_scalar(self):
+    return (tf.get_static_value(self._forward_min_event_ndims) == 0 and
+            tf.get_static_value(self._inverse_min_event_ndims) == 0)
+
   @property
   def validate_args(self):
     """Returns True if Tensor arguments will be validated."""
@@ -1033,6 +1039,8 @@ def _call_inverse_log_det_jacobian(self, y, event_ndims, name, **kwargs):
         elif hasattr(self, '_forward_log_det_jacobian'):
           x = self.inverse(y, **kwargs)  # Fall back to computing `-fldj(x)`
           ildj = attrs['ildj'] = -self._forward_log_det_jacobian(x, **kwargs)
+        elif self._is_scalar:
+          ildj = _autodiff_log_det_jacobian(self._inverse, y)
         else:
           raise NotImplementedError(
               'Neither _forward_log_det_jacobian nor _inverse_log_det_jacobian '
@@ -1136,6 +1144,8 @@ def _call_forward_log_det_jacobian(self, x, event_ndims, name, **kwargs):
         elif hasattr(self, '_inverse_log_det_jacobian'):
           y = self.forward(x, **kwargs)  # Fall back to computing `ildj(y)`
           ildj = attrs['ildj'] = self._inverse_log_det_jacobian(y, **kwargs)
+        elif self._is_scalar:
+          ildj = -_autodiff_log_det_jacobian(self._forward, x)
         else:
           raise NotImplementedError(
               'Neither _forward_log_det_jacobian nor _inverse_log_det_jacobian '
@@ -1670,3 +1680,12 @@ def ldj_reduction_shape(shape_structure,
                            'LDJ reduction shape.')))
 
     return ldj_reduce_shape, assertions
+
+
+def _autodiff_log_det_jacobian(fn, x):
+  """Automatically compute the log det jacobian of a scalar function."""
+  _, grads = gradient.value_and_gradient(fn, x)
+  if grads is None:
+    raise ValueError('Cannot compute log det jacobian; function {} has `None` '
+                     'gradient.'.format(fn))
+  return tf.math.log(tf.abs(grads))
diff --git a/tensorflow_probability/python/bijectors/bijector_test.py b/tensorflow_probability/python/bijectors/bijector_test.py
@@ -80,12 +80,12 @@ def __init__(self):
 
     with self.assertRaisesRegexp(
         NotImplementedError,
-        'Neither _forward_log_det_jacobian nor _inverse_log_det_jacobian.*'):
+        'inverse not implemented'):
       bij.inverse_log_det_jacobian(0, event_ndims=0)
 
     with self.assertRaisesRegexp(
         NotImplementedError,
-        'Neither _forward_log_det_jacobian nor _inverse_log_det_jacobian.*'):
+        'forward not implemented'):
       bij.forward_log_det_jacobian(0, event_ndims=0)
 
   @test_util.disable_test_for_backend(
@@ -124,6 +124,53 @@ def _forward(self, x):
         error_clazz, 'Tensor conversion requested dtype'):
       b64.forward(x32)
 
+  @test_util.numpy_disable_gradient_test
+  def testAutodiffLogDetJacobian(self):
+
+    class NoJacobianBijector(tfb.Bijector):
+      """Bijector with no log det jacobian methods."""
+
+      def __init__(self, scale=2.):
+        parameters = dict(locals())
+        self._scale = tensor_util.convert_nonref_to_tensor(scale)
+        super(NoJacobianBijector, self).__init__(
+            validate_args=True,
+            forward_min_event_ndims=0,
+            parameters=parameters)
+
+      def _forward(self, x):
+        return tf.exp(self._scale * x)
+
+      def _inverse(self, y):
+        return tf.math.log(y) / self._scale
+
+    b = NoJacobianBijector(scale=1.4)
+    x = tf.convert_to_tensor([2., -3.])
+    [
+        fldj,
+        true_fldj,
+        ildj
+    ] = self.evaluate([
+        b.forward_log_det_jacobian(x, event_ndims=0),
+        tf.math.log(b._scale) + b._scale * x,
+        b.inverse_log_det_jacobian(b.forward(x), event_ndims=0)
+    ])
+    self.assertAllClose(fldj, true_fldj)
+    self.assertAllClose(fldj, -ildj)
+
+    y = tf.convert_to_tensor([27., 5.])
+    [
+        ildj,
+        true_ildj,
+        fldj
+    ] = self.evaluate([
+        b.inverse_log_det_jacobian(y, event_ndims=0),
+        -tf.math.log(tf.abs(y * b._scale)),
+        b.forward_log_det_jacobian(b.inverse(y), event_ndims=0)
+    ])
+    self.assertAllClose(ildj, true_ildj)
+    self.assertAllClose(ildj, -fldj)
+
 
 class IntentionallyMissingError(Exception):
   pass

diff --git a/tensorflow_probability/python/bijectors/glow.py b/tensorflow_probability/python/bijectors/glow.py
@@ -205,6 +205,8 @@ class Glow(chain.Chain):
 
   from functools import reduce
   from operator import mul
+  import tensorflow as tf
+  import tensorflow_datasets as tfds
   import tensorflow_probability as tfp
   tfb = tfp.bijectors
   tfd = tfp.distributions

diff --git a/tensorflow_probability/python/distributions/joint_distribution_coroutine.py b/tensorflow_probability/python/distributions/joint_distribution_coroutine.py
@@ -219,8 +219,10 @@ def __init__(
     """
     parameters = dict(locals())
     with tf.name_scope(name or 'JointDistributionCoroutine') as name:
-      self._sample_dtype = sample_dtype
       self._model_coroutine = model
+      # Hint `no_dependency` to tell tf.Module not to screw up the sample dtype
+      # with extraneous wrapping (list => ListWrapper, etc.).
+      self._sample_dtype = self._no_dependency(sample_dtype)
       self._single_sample_distributions = {}
       super(JointDistributionCoroutine, self).__init__(
           dtype=sample_dtype,

diff --git a/tensorflow_probability/python/distributions/joint_distribution_coroutine_test.py b/tensorflow_probability/python/distributions/joint_distribution_coroutine_test.py
@@ -624,6 +624,7 @@ def noncentered_horseshoe_prior(num_features):
           tfd.Sample(tfd.Normal(0., 1.), num_features))
       yield tfd.Independent(tfd.Deterministic(weights_noncentered * scale),
                             reinterpreted_batch_ndims=1)
+
     # Currently sample_dtype is only used for `tf.nest.pack_structure_as`. In
     # the future we may use it for error checking and/or casting.
     sample_dtype = collections.namedtuple('Model', [
@@ -645,6 +646,18 @@ def noncentered_horseshoe_prior(num_features):
     self.assertEqual([3, 4], joint.log_prob(
         joint.sample([3, 4], seed=test_util.test_seed())).shape)
 
+    # Check that a list dtype doesn't get corrupted by `tf.Module` wrapping.
+    sample_dtype = [None, None, None, None]
+    joint = tfd.JointDistributionCoroutine(
+        lambda: noncentered_horseshoe_prior(4),
+        sample_dtype=sample_dtype,
+        validate_args=True)
+    ds, xs = joint.sample_distributions([2, 3], seed=test_util.test_seed())
+    self.assertEqual(type(sample_dtype), type(xs))
+    self.assertEqual(type(sample_dtype), type(ds))
+    tf.nest.assert_same_structure(sample_dtype, ds)
+    tf.nest.assert_same_structure(sample_dtype, xs)
+
   def test_repr_with_custom_sample_dtype(self):
     def model():
       s = yield tfd.JointDistributionCoroutine.Root(

diff --git a/tensorflow_probability/python/distributions/platform_compatibility_test.py b/tensorflow_probability/python/distributions/platform_compatibility_test.py
@@ -122,7 +122,7 @@
     'BetaBinomial': 1e-5,
     'CholeskyLKJ': 1e-4,
     'LKJ': 1e-3,
-    'PowerSpherical': 1e-5,
+    'PowerSpherical': 2e-5,
 })
 
 VECTORIZED_LOGPROB_RTOL = collections.defaultdict(lambda: 1e-6)

diff --git a/tensorflow_probability/python/distributions/sample.py b/tensorflow_probability/python/distributions/sample.py
@@ -327,9 +327,9 @@ def _parameter_control_dependencies(self, is_init):
 
     return assertions
 
-  _composite_tensor_nonshape_params = ('distribution,')
+  _composite_tensor_nonshape_params = ('distribution',)
 
-  _composite_tensor_shape_params = ('sample_shape,')
+  _composite_tensor_shape_params = ('sample_shape',)
 
 
 class _DefaultSampleBijector(bijector_lib.Bijector):

diff --git a/tensorflow_probability/python/experimental/mcmc/expectations_reducer_test.py b/tensorflow_probability/python/experimental/mcmc/expectations_reducer_test.py
@@ -126,9 +126,9 @@ def test_in_with_reductions(self):
     )
     pkr = reduced_kernel.bootstrap_results(8)
     _, kernel_results = reduced_kernel.one_step(8, pkr)
-    streaming_calculations = self.evaluate(
-        mean_reducer.finalize(kernel_results.streaming_calculations))
-    self.assertEqual(9, streaming_calculations)
+    reduction_results = self.evaluate(
+        mean_reducer.finalize(kernel_results.reduction_results))
+    self.assertEqual(9, reduction_results)
 
   def test_in_step_kernel(self):
     fake_kernel = test_fixtures.TestTransitionKernel()
@@ -142,9 +142,9 @@ def test_in_step_kernel(self):
         kernel=reduced_kernel,
         return_final_kernel_results=True,
     )
-    streaming_calculations = self.evaluate(
-        mean_reducer.finalize(kernel_results.streaming_calculations))
-    self.assertEqual(11, streaming_calculations)
+    reduction_results = self.evaluate(
+        mean_reducer.finalize(kernel_results.reduction_results))
+    self.assertEqual(11, reduction_results)
 
 
 if __name__ == '__main__':

diff --git a/tensorflow_probability/python/experimental/mcmc/kernel_outputs.py b/tensorflow_probability/python/experimental/mcmc/kernel_outputs.py
@@ -77,7 +77,7 @@ def _process_results(self):
           reducers,
           lambda r, s: r.finalize(s),
           reducers,
-          unnest.get_outermost(self.results, 'streaming_calculations'),
+          unnest.get_outermost(self.results, 'reduction_results'),
           check_types=False)
 
       # Grab useful reductions.

diff --git a/tensorflow_probability/python/experimental/mcmc/potential_scale_reduction_reducer.py b/tensorflow_probability/python/experimental/mcmc/potential_scale_reduction_reducer.py
@@ -35,7 +35,7 @@
 
 
 PotentialScaleReductionReducerState = collections.namedtuple(
-    'PotentialScaleReductionReducerState', 'init_state, rhat_state')
+    'PotentialScaleReductionReducerState', 'rhat_state')
 
 
 class PotentialScaleReductionReducer(reducer_base.Reducer):
@@ -114,18 +114,21 @@ def initialize(self, initial_chain_state, initial_kernel_results=None):
         mcmc_util.make_name(
             self.name, 'potential_scale_reduction_reducer', 'initialize')):
       initial_chain_state = tf.nest.map_structure(
-          tf.convert_to_tensor,
+          tf.convert_to_tensor, initial_chain_state)
+      sample_shape = tf.nest.map_structure(
+          lambda chain_state: tuple(ps.shape(chain_state)),
+          initial_chain_state)
+      chain_ndims = tf.nest.map_structure(
+          lambda chain_state: self.independent_chain_ndims,
+          initial_chain_state)
+      dtype = tf.nest.map_structure(
+          lambda chain_state: chain_state.dtype,
           initial_chain_state)
-      sample_shape, chain_ndims, dtype = _prepare_args(
-          initial_chain_state, self.independent_chain_ndims
-      )
-      running_rhat = sample_stats.RunningPotentialScaleReduction(
+      rhat = sample_stats.RunningPotentialScaleReduction.from_shape(
           shape=sample_shape,
           independent_chain_ndims=chain_ndims,
-          dtype=dtype
-      )
-      return PotentialScaleReductionReducerState(
-          initial_chain_state, running_rhat.initialize())
+          dtype=dtype)
+      return PotentialScaleReductionReducerState(rhat)
 
   def one_step(
       self,
@@ -156,20 +159,8 @@ def one_step(
       new_chain_state = tf.nest.map_structure(
           tf.convert_to_tensor,
           new_chain_state)
-      sample_shape, chain_ndims, dtype = _prepare_args(
-          new_chain_state, self.independent_chain_ndims
-      )
-      running_rhat = sample_stats.RunningPotentialScaleReduction(
-          shape=sample_shape,
-          independent_chain_ndims=chain_ndims,
-          dtype=dtype
-      )
-      new_rhat_state = running_rhat.update(
-          current_reducer_state.rhat_state,
-          new_chain_state)
-      return PotentialScaleReductionReducerState(
-          current_reducer_state.init_state,
-          new_rhat_state)
+      new_rhat = current_reducer_state.rhat_state.update(new_chain_state)
+      return PotentialScaleReductionReducerState(new_rhat)
 
   def finalize(self, final_reducer_state):
     """Finalizes R-hat calculation from the `final_reducer_state`.
@@ -181,18 +172,10 @@ def finalize(self, final_reducer_state):
     Returns:
       rhat: an estimate of the R-hat.
     """
-    sample_shape, chain_ndims, dtype = _prepare_args(
-        final_reducer_state.init_state, self.independent_chain_ndims
-    )
     with tf.name_scope(
         mcmc_util.make_name(
             self.name, 'potential_scale_reduction_reducer', 'finalize')):
-      running_rhat = sample_stats.RunningPotentialScaleReduction(
-          shape=sample_shape,
-          independent_chain_ndims=chain_ndims,
-          dtype=dtype,
-      )
-      return running_rhat.finalize(final_reducer_state.rhat_state)
+      return final_reducer_state.rhat_state.potential_scale_reduction()
 
   @property
   def parameters(self):
@@ -205,20 +188,3 @@ def independent_chain_ndims(self):
   @property
   def name(self):
     return self._parameters['name']
-
-
-def _prepare_args(target, chain_ndims):
-  """Infers metadata to instantiate a streaming rhat object from `target`."""
-  sample_shape = tf.nest.map_structure(
-      lambda chain_state: tuple(ps.shape(chain_state)),
-      target
-  )
-  nested_chain_ndims = tf.nest.map_structure(
-      lambda _: chain_ndims,
-      target
-  )
-  dtype = tf.nest.map_structure(
-      lambda chain_state: chain_state.dtype,
-      target
-  )
-  return sample_shape, nested_chain_ndims, dtype
diff --git a/tensorflow_probability/python/experimental/mcmc/potential_scale_reduction_reducer_test.py b/tensorflow_probability/python/experimental/mcmc/potential_scale_reduction_reducer_test.py
@@ -142,7 +142,7 @@ def test_in_with_reductions(self):
       chain_state, pkr = reduced_kernel.one_step(
           chain_state, pkr)
     rhat = self.evaluate(
-        rhat_reducer.finalize(pkr.streaming_calculations))
+        rhat_reducer.finalize(pkr.reduction_results))
     self.assertEqual(0.5, rhat)
 
   def test_iid_normal_passes(self):

diff --git a/tensorflow_probability/python/experimental/mcmc/sample_discarding_kernel_test.py b/tensorflow_probability/python/experimental/mcmc/sample_discarding_kernel_test.py
@@ -146,7 +146,7 @@ def test_with_composed_kernel(self):
     for _ in range(2):
       current_state, kernel_results = reducer_kernel.one_step(
           current_state, kernel_results)
-    cov = cov_reducer.finalize(kernel_results.streaming_calculations)
+    cov = cov_reducer.finalize(kernel_results.reduction_results)
     self.assertAllEqual(16, current_state)
     self.assertAllEqual(2, kernel_results.inner_results.call_counter)
     self.assertAllEqual(

diff --git a/tensorflow_probability/python/experimental/mcmc/sample_fold.py b/tensorflow_probability/python/experimental/mcmc/sample_fold.py
@@ -157,7 +157,7 @@ def sample_fold(
         reducer,
         lambda r, s: r.finalize(s),
         reducer,
-        final_kernel_results.streaming_calculations,
+        final_kernel_results.reduction_results,
         check_types=False)
     if reducer_was_none:
       reduction_results = None
@@ -169,7 +169,7 @@ def sample_fold(
       return (reduction_results,
               end_state,
               final_kernel_results.inner_results.inner_results,
-              final_kernel_results.streaming_calculations)
+              final_kernel_results.reduction_results)
     else:
       return (reduction_results,
               end_state,