Using Distrax bijectors with Flax #2572

nalzok · 2022-11-01T15:59:51Z

nalzok
Nov 1, 2022

I wonder if it is possible to use Distrax bijectors with Flax instead of Haiku. I attempted to port the Flow example to Flax, but got a weird flax.errors.CallCompactUnboundModuleError: Can't call compact methods on unbound modules. Am I doing something wrong or Distrax+Flax is simply not supported?

"""A simple example of a flow model trained on MNIST."""

from typing import Sequence, List, Callable, Any

import numpy as np
import jax
import jax.numpy as jnp
import flax.linen as nn
import distrax


def make_conditioner(
    event_shape: Sequence[int], hidden_sizes: Sequence[int], num_bijector_params: int
) -> nn.Module:
    """Creates an MLP conditioner for each layer of the flow."""
    layers: List[Callable[..., Any]] = [
        lambda x: x.reshape((-1, *x.shape[-len(event_shape) :]))
    ]

    for hidden_size in hidden_sizes:
        layers.append(nn.Dense(hidden_size))
        layers.append(nn.relu)

    # We initialize this linear layer to zero so that the flow is initialized
    # to the identity function.
    layers.append(
        nn.Dense(
            np.prod(event_shape) * num_bijector_params,
            kernel_init=jax.nn.initializers.zeros,
            bias_init=jax.nn.initializers.zeros,
        )
    )
    layers.append(
        lambda x: x.reshape(
            (
                *x.shape[:-3],
                *event_shape,
                num_bijector_params,
            )
        )
    )

    return nn.Sequential(layers)


class Flow(nn.Module):
    latent_dim: int
    hidden_dims: Sequence[int]
    num_coupling_layers: int
    num_bins: int

    def setup(self):
        """Creates the flow model."""
        # Alternating binary mask.
        event_shape = (self.latent_dim,)
        mask = jnp.arange(0, np.prod(event_shape)) % 2
        mask = jnp.reshape(mask, event_shape)
        mask = mask.astype(bool)

        def bijector_fn(params: jnp.ndarray):
            return distrax.RationalQuadraticSpline(params, range_min=0.0, range_max=1.0)

        # Number of parameters for the rational-quadratic spline:
        # - `num_bins` bin widths
        # - `num_bins` bin heights
        # - `num_bins + 1` knot slopes
        # for a total of `3 * num_bins + 1` parameters.
        num_bijector_params = 3 * self.num_bins + 1

        layers = []
        for _ in range(self.num_coupling_layers):
            layer = distrax.MaskedCoupling(
                mask=mask,
                bijector=bijector_fn,
                conditioner=make_conditioner(
                    event_shape, self.hidden_dims, num_bijector_params
                ),
            )
            layers.append(layer)
            # Flip the mask after each layer.
            mask = jnp.logical_not(mask)

        self.flow: distrax.Bijector = distrax.Chain(layers)

    def __call__(self, X):
        if len(X.shape) == 0:
            X = X[jnp.newaxis, ...]
        return self.flow.forward(X)


if __name__ == "__main__":
    latent_dim = 20
    hidden_dims = (32, 64, 128, 256, 512)
    flow_num_coupling_layers = 8
    flow_num_bins = 4

    key = jax.random.PRNGKey(42)
    vae = Flow(
        latent_dim,
        hidden_dims,
        flow_num_coupling_layers,
        flow_num_bins,
    )
    Z_dummy = jnp.empty((1, latent_dim))
    variables = vae.init(key, Z_dummy)

Answered by cgarciae

Nov 1, 2022

Hey @nalzok, problem with this is that self.flow is not a Flax Module so Flax can't set the scope for inner Modules that live MaskedCoupling.conditioner, you can go around this by defining everything inside a compact method. Following code runs:

from typing import Sequence, List, Callable, Any

import numpy as np
import jax
import jax.numpy as jnp
import flax.linen as nn
import distrax


def make_conditioner(
    event_shape: Sequence[int], hidden_sizes: Sequence[int], num_bijector_params: int
) -> nn.Module:
    """Creates an MLP conditioner for each layer of the flow."""
    layers: List[Callable[..., Any]] = [
        lambda x: x.reshape((-1, *x.shape[-len(event_shape) :]))
    ]

    for

View full answer

cgarciae · 2022-11-01T17:51:40Z

cgarciae
Nov 1, 2022
Maintainer

Hey @nalzok, problem with this is that self.flow is not a Flax Module so Flax can't set the scope for inner Modules that live MaskedCoupling.conditioner, you can go around this by defining everything inside a compact method. Following code runs:

from typing import Sequence, List, Callable, Any

import numpy as np
import jax
import jax.numpy as jnp
import flax.linen as nn
import distrax


def make_conditioner(
    event_shape: Sequence[int], hidden_sizes: Sequence[int], num_bijector_params: int
) -> nn.Module:
    """Creates an MLP conditioner for each layer of the flow."""
    layers: List[Callable[..., Any]] = [
        lambda x: x.reshape((-1, *x.shape[-len(event_shape) :]))
    ]

    for hidden_size in hidden_sizes:
        layers.append(nn.Dense(hidden_size))
        layers.append(nn.relu)

    # We initialize this linear layer to zero so that the flow is initialized
    # to the identity function.
    layers.append(
        nn.Dense(
            np.prod(event_shape) * num_bijector_params,
            kernel_init=jax.nn.initializers.zeros,
            bias_init=jax.nn.initializers.zeros,
        )
    )
    layers.append(
        lambda x: x.reshape(
            (
                *x.shape[:-3],
                *event_shape,
                num_bijector_params,
            )
        )
    )

    return nn.Sequential(layers)


class Flow(nn.Module):
    latent_dim: int
    hidden_dims: Sequence[int]
    num_coupling_layers: int
    num_bins: int

    @nn.compact
    def __call__(self, X):
        """Creates the flow model."""
        # Alternating binary mask.
        event_shape = (self.latent_dim,)
        mask = jnp.arange(0, np.prod(event_shape)) % 2
        mask = jnp.reshape(mask, event_shape)
        mask = mask.astype(bool)

        def bijector_fn(params: jnp.ndarray):
            return distrax.RationalQuadraticSpline(params, range_min=0.0, range_max=1.0)

        # Number of parameters for the rational-quadratic spline:
        # - `num_bins` bin widths
        # - `num_bins` bin heights
        # - `num_bins + 1` knot slopes
        # for a total of `3 * num_bins + 1` parameters.
        num_bijector_params = 3 * self.num_bins + 1

        layers = []
        for _ in range(self.num_coupling_layers):
            layer = distrax.MaskedCoupling(
                mask=mask,
                bijector=bijector_fn,
                conditioner=make_conditioner(
                    event_shape, self.hidden_dims, num_bijector_params
                ),
            )
            layers.append(layer)
            # Flip the mask after each layer.
            mask = jnp.logical_not(mask)

        flow: distrax.Bijector = distrax.Chain(layers)

        if len(X.shape) == 0:
            X = X[jnp.newaxis, ...]
        
        return flow.forward(X)


if __name__ == "__main__":
    latent_dim = 20
    hidden_dims = (32, 64, 128, 256, 512)
    flow_num_coupling_layers = 8
    flow_num_bins = 4

    key = jax.random.PRNGKey(42)
    vae = Flow(
        latent_dim,
        hidden_dims,
        flow_num_coupling_layers,
        flow_num_bins,
    )
    Z_dummy = jnp.empty((1, latent_dim))
    variables = vae.init(key, Z_dummy)

Note: if flow contains state, this state is not being tracked.

3 replies

nalzok Nov 1, 2022
Author

I see. Thanks for the quick reply! The thing is, I did self.flow: distrax.Bijector = distrax.Chain(layers) since I want to access the bijector in a parent module.

More concretely, I am using Flow as a submodule of VAE, and I need to call the bijector (self.flow) in the __call__ method of VAE. Here is an example usage in the repo I am working on, where self.flow.forward invokes the bijector. (In case you want to run the code, just do pipenv run python3 -m main.vae):

https://github.com/nalzok/vae-flow-flax/blob/3ef4b2d109d653907bd824e5ad0cfaf609fd5dd9/main/vae.py#L123-L125

Basically, I want to use the bijector in two places rather than only in Flow.__call__. Is that doable?

Additionally, you said if flow contains state, this state is not being tracked. Does that mean the parameters in self.flow will not be trainable (note that conditioner is a Flax module which contains parameters), or did you mean things like batch_stats are not updated?

cgarciae Nov 1, 2022
Maintainer

I see, then maybe you can create the Flax Modules first and assign them to a field Flax can see (e.g. self.conditioners list) and then pass these modules after assignment to distrax. Following code works:

from typing import Sequence, List, Callable, Any

import numpy as np
import jax
import jax.numpy as jnp
import flax.linen as nn
import distrax


def make_conditioner(
    event_shape: Sequence[int], hidden_sizes: Sequence[int], num_bijector_params: int
) -> nn.Module:
    """Creates an MLP conditioner for each layer of the flow."""
    layers: List[Callable[..., Any]] = [
        lambda x: x.reshape((-1, *x.shape[-len(event_shape) :]))
    ]

    for hidden_size in hidden_sizes:
        layers.append(nn.Dense(hidden_size))
        layers.append(nn.relu)

    # We initialize this linear layer to zero so that the flow is initialized
    # to the identity function.
    layers.append(
        nn.Dense(
            np.prod(event_shape) * num_bijector_params,
            kernel_init=jax.nn.initializers.zeros,
            bias_init=jax.nn.initializers.zeros,
        )
    )
    layers.append(
        lambda x: x.reshape(
            (
                *x.shape[:-3],
                *event_shape,
                num_bijector_params,
            )
        )
    )

    return nn.Sequential(layers)


class Flow(nn.Module):
    latent_dim: int
    hidden_dims: Sequence[int]
    num_coupling_layers: int
    num_bins: int

    def setup(self):
        """Creates the flow model."""
        # Alternating binary mask.
        event_shape = (self.latent_dim,)
        mask = jnp.arange(0, np.prod(event_shape)) % 2
        mask = jnp.reshape(mask, event_shape)
        mask = mask.astype(bool)

        def bijector_fn(params: jnp.ndarray):
            return distrax.RationalQuadraticSpline(params, range_min=0.0, range_max=1.0)

        # Number of parameters for the rational-quadratic spline:
        # - `num_bins` bin widths
        # - `num_bins` bin heights
        # - `num_bins + 1` knot slopes
        # for a total of `3 * num_bins + 1` parameters.
        num_bijector_params = 3 * self.num_bins + 1

        # bind Flax Modules first
        self.conditioners = [
            make_conditioner(event_shape, self.hidden_dims, num_bijector_params)
            for _ in range(self.num_coupling_layers)
        ]

        layers = []
        for conditioner in self.conditioners:
            layer = distrax.MaskedCoupling(
                mask=mask,
                bijector=bijector_fn,
                conditioner=conditioner, # pass bounded Modules later
            )
            layers.append(layer)
            # Flip the mask after each layer.
            mask = jnp.logical_not(mask)

        self.flow: distrax.Bijector = distrax.Chain(layers)

    def __call__(self, X):
        if len(X.shape) == 0:
            X = X[jnp.newaxis, ...]
        return self.flow.forward(X)


if __name__ == "__main__":
    latent_dim = 20
    hidden_dims = (32, 64, 128, 256, 512)
    flow_num_coupling_layers = 8
    flow_num_bins = 4

    key = jax.random.PRNGKey(42)
    vae = Flow(
        latent_dim,
        hidden_dims,
        flow_num_coupling_layers,
        flow_num_bins,
    )
    Z_dummy = jnp.empty((1, latent_dim))
    variables = vae.init(key, Z_dummy)

nalzok Nov 1, 2022
Author

It appears to be working now. Thanks so much!

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Using Distrax bijectors with Flax #2572

{{title}}

Replies: 1 comment 3 replies

{{title}}

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

{{editor}}'s edit

{{editor}}'s edit

{{title}}

Select a reply

Using Distrax bijectors with Flax #2572

nalzok Nov 1, 2022

Replies: 1 comment · 3 replies

cgarciae Nov 1, 2022 Maintainer

nalzok Nov 1, 2022 Author

cgarciae Nov 1, 2022 Maintainer

nalzok Nov 1, 2022 Author

nalzok
Nov 1, 2022

Replies: 1 comment 3 replies

cgarciae
Nov 1, 2022
Maintainer

nalzok Nov 1, 2022
Author

cgarciae Nov 1, 2022
Maintainer

nalzok Nov 1, 2022
Author