From d0e2f95d6924314fd60aa890708e2e7acf67c84a Mon Sep 17 00:00:00 2001
From: Xingyou Song <xingyousong@google.com>
Date: Thu, 23 May 2024 07:17:01 -0700
Subject: [PATCH] Reduce OSS CI errors + fix downstream errors

PiperOrigin-RevId: 636541458
---
 .github/workflows/core_test.yml        |   4 +-
 iris/policies/keras_cnn_policy.py      |  97 ++++++++-------
 iris/policies/keras_nn_policy.py       |  31 +++--
 iris/policies/keras_pi_policy.py       | 156 +++++++++++++------------
 iris/policies/keras_toeplitz_policy.py |  85 ++++++++------
 iris/policies/spatial_softmax_test.py  |  10 +-
 iris/worker.py                         |   1 -
 requirements.txt                       |  23 +++-
 setup.py                               |   6 +-
 9 files changed, 240 insertions(+), 173 deletions(-)

diff --git a/.github/workflows/core_test.yml b/.github/workflows/core_test.yml
index 3ac1ebc..dbc2296 100644
--- a/.github/workflows/core_test.yml
+++ b/.github/workflows/core_test.yml
@@ -14,7 +14,7 @@ jobs:
     runs-on: "${{ matrix.os }}"
     strategy:
       matrix:
-        python-version: [3.9]
+        python-version: ['3.10']
         os: [ubuntu-latest]
     steps:
       - uses: actions/checkout@v2
@@ -33,6 +33,6 @@ jobs:
       - name: Print installed dependencies
         run: |
           pip freeze
-      - name: Test with pytest # (TODO: Automate Iris installation)
+      - name: Test with pytest  # TODO(team): Fix tensorflow version conflict.
         run: |
           # pytest -n auto iris
diff --git a/iris/policies/keras_cnn_policy.py b/iris/policies/keras_cnn_policy.py
index 58f7035..8812ab3 100644
--- a/iris/policies/keras_cnn_policy.py
+++ b/iris/policies/keras_cnn_policy.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Policy class that computes action by running convolutional neural network."""
+
 from typing import Dict, Optional, Sequence, Union
 
 import gym
@@ -27,8 +28,9 @@
 class KerasCNNPolicy(keras_policy.KerasPolicy):
   """Policy class, computes action by running convolutional neural network."""
 
-  def __init__(self, ob_space: gym.Space, ac_space: gym.Space,
-               **kwargs) -> None:
+  def __init__(
+      self, ob_space: gym.Space, ac_space: gym.Space, **kwargs
+  ) -> None:
     """Initializes a keras CNN policy. See the base class for more details."""
     self._rnn_state = None
     super().__init__(ob_space=ob_space, ac_space=ac_space, **kwargs)
@@ -36,13 +38,14 @@ def __init__(self, ob_space: gym.Space, ac_space: gym.Space,
   def _create_vision_input_layers(self):
     vision_input_layers = []
     for image_label in self._image_input_labels:
-      image_size = self._ob_space[image_label].shape
       vision_input_layers.append(
           tf.keras.layers.Input(
-              batch_input_shape=(1, image_size[0], image_size[1],
-                                 image_size[2]),
-              dtype="float",
-              name="vision_input" + image_label))
+              shape=self._ob_space[image_label].shape,
+              batch_size=1,
+              dtype="float32",
+              name="vision_input" + image_label,
+          )
+      )
     return vision_input_layers
 
   def _create_other_input_layer(self):
@@ -53,9 +56,11 @@ def _create_other_input_layer(self):
     self._other_ob_dim = utils.flatdim(self._other_ob_space)
     if self._other_ob_dim > 0:
       return tf.keras.layers.Input(
-          batch_input_shape=(1, self._other_ob_dim),
-          dtype="float",
-          name="other_input")
+          shape=(self._other_ob_dim,),
+          batch_size=1,
+          dtype="float32",
+          name="other_input",
+      )
     return None
 
   def _create_vision_processing_layers(
@@ -67,7 +72,8 @@ def _create_vision_processing_layers(
       pool_sizes: Optional[Sequence[int]] = None,
       pool_strides: Optional[Sequence[int]] = None,
       final_vision_activation: str = "relu",
-      use_spatial_softmax: bool = False) -> tf.keras.layers.Layer:
+      use_spatial_softmax: bool = False,
+  ) -> tf.keras.layers.Layer:
     """Create keras layers for CNN image processing.
 
     Args:
@@ -94,17 +100,18 @@ def _create_vision_processing_layers(
       pool_strides = [None] * len(conv_filter_sizes)
 
     for filter_size, kernel_size, pool_size, pool_stride in zip(
-        conv_filter_sizes, conv_kernel_sizes, pool_sizes, pool_strides):
+        conv_filter_sizes, conv_kernel_sizes, pool_sizes, pool_strides
+    ):
       x = tf.keras.layers.Conv2D(
           filter_size,
           kernel_size=kernel_size,
           padding="valid",
-          activation=final_vision_activation)(
-              x)
+          activation=final_vision_activation,
+      )(x)
       if pool_size is not None:
-        x = tf.keras.layers.MaxPool2D(
-            pool_size=pool_size, strides=pool_stride)(
-                x)
+        x = tf.keras.layers.MaxPool2D(pool_size=pool_size, strides=pool_stride)(
+            x
+        )
 
     # Flattening or spatial softmax on image feature map.
     if use_spatial_softmax:
@@ -114,36 +121,41 @@ def _create_vision_processing_layers(
 
     # Encoding image into a feature vector.
     return tf.keras.layers.Dense(
-        image_feature_length, activation=final_vision_activation)(
-            x)
+        image_feature_length, activation=final_vision_activation
+    )(x)
 
   def _create_rnn_layers(self, x, inputs):
     """By default, creates an LSTM."""
     lstm_h_state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, self._rnn_units),
-        dtype="float",
-        name="lstm_h_state_input")
+        shape=(self._rnn_units,),
+        batch_size=1,
+        dtype="float32",
+        name="lstm_h_state_input",
+    )
     lstm_c_state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, self._rnn_units),
-        dtype="float",
-        name="lstm_c_state_input")
+        shape=(self._rnn_units,),
+        batch_size=1,
+        dtype="float32",
+        name="lstm_c_state_input",
+    )
     inputs.append(lstm_h_state_input)
     inputs.append(lstm_c_state_input)
-    h_state = lstm_h_state_input
-    c_state = lstm_c_state_input
     x = tf.keras.layers.Reshape((1, -1))(x)
     x, h_state, c_state = tf.keras.layers.LSTM(
-        units=self._rnn_units, return_state=True, stateful=True)(
-            x, initial_state=[lstm_h_state_input, lstm_c_state_input])
+        units=self._rnn_units, return_state=True, stateful=True
+    )(x, initial_state=[lstm_h_state_input, lstm_c_state_input])
     return x, [h_state, c_state]
 
-  def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
-                   fc_layer_sizes: Sequence[int],
-                   use_rnn: bool = False,
-                   rnn_units: int = 32,
-                   image_input_label: Union[Sequence[str], str] = "vision",
-                   final_layer_init: str = "glorot_uniform",
-                   **kwargs) -> None:
+  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
+  def _build_model(
+      self,
+      fc_layer_sizes: Sequence[int],
+      use_rnn: bool = False,
+      rnn_units: int = 32,
+      image_input_label: Union[Sequence[str], str] = "vision",
+      final_layer_init: str = "glorot_uniform",
+      **kwargs
+  ) -> None:
     """Constructs a keras CNN to process vision and other sensor data.
 
     Args:
@@ -171,7 +183,8 @@ def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-param
     vision_outputs = []
     for vision_input in inputs:
       vision_outputs.append(
-          self._create_vision_processing_layers(x=vision_input, **kwargs))
+          self._create_vision_processing_layers(x=vision_input, **kwargs)
+      )
     vision_output = tf.keras.layers.concatenate(vision_outputs)
 
     if self._use_rnn:
@@ -190,16 +203,18 @@ def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-param
     for fc_layer_size in fc_layer_sizes:
       x = tf.keras.layers.Dense(fc_layer_size, activation="tanh")(x)
     action_output = tf.keras.layers.Dense(
-        self._ac_dim, activation="tanh", kernel_initializer=final_layer_init)(
-            x)
+        self._ac_dim, activation="tanh", kernel_initializer=final_layer_init
+    )(x)
     outputs.append(action_output)
 
     self.model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
 
+  # pytype: enable=signature-mismatch  # overriding-parameter-count-checks
+
   def reset(self) -> None:
     """Resets the policy's internal state (default LSTM)."""
-    lstm_h_state = np.zeros(shape=(1, self._rnn_units), dtype="float")
-    lstm_c_state = np.zeros(shape=(1, self._rnn_units), dtype="float")
+    lstm_h_state = np.zeros(shape=(1, self._rnn_units), dtype="float32")
+    lstm_c_state = np.zeros(shape=(1, self._rnn_units), dtype="float32")
     self._rnn_state = [lstm_h_state, lstm_c_state]
 
   def act(
diff --git a/iris/policies/keras_nn_policy.py b/iris/policies/keras_nn_policy.py
index 0e67266..c821f76 100644
--- a/iris/policies/keras_nn_policy.py
+++ b/iris/policies/keras_nn_policy.py
@@ -22,11 +22,14 @@
 class KerasNNPolicy(keras_policy.KerasPolicy):
   """Policy class that computes action by running feed fwd neural network."""
 
-  def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
-                   hidden_layer_sizes: Sequence[int],
-                   activation: str = "tanh",
-                   use_bias: bool = False,
-                   kernel_initializer: str = "zeros") -> None:
+  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
+  def _build_model(
+      self,
+      hidden_layer_sizes: Sequence[int],
+      activation: str = "tanh",
+      use_bias: bool = False,
+      kernel_initializer: str = "zeros",
+  ) -> None:
     """Constructs a keras feed forward neural network model.
 
     Args:
@@ -37,20 +40,24 @@ def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-param
     """
     # Creates model.
     input_layer = tf.keras.layers.Input(
-        batch_input_shape=(1, self._ob_dim), dtype="float", name="input")
+        shape=(self._ob_dim,), batch_size=1, dtype="float32", name="input"
+    )
     x = input_layer
     for layer_size in hidden_layer_sizes:
       x = tf.keras.layers.Dense(
           layer_size,
           activation=activation,
           use_bias=use_bias,
-          kernel_initializer=kernel_initializer)(
-              x)
+          kernel_initializer=kernel_initializer,
+      )(x)
     output_layer = tf.keras.layers.Dense(
         self._ac_dim,
         activation=activation,
         use_bias=use_bias,
-        kernel_initializer=kernel_initializer)(
-            x)
-    self.model = tf.keras.models.Model(inputs=[input_layer],
-                                       outputs=[output_layer])
+        kernel_initializer=kernel_initializer,
+    )(x)
+    self.model = tf.keras.models.Model(
+        inputs=[input_layer], outputs=[output_layer]
+    )
+
+  # pytype: enable=signature-mismatch  # overriding-parameter-count-checks
diff --git a/iris/policies/keras_pi_policy.py b/iris/policies/keras_pi_policy.py
index 737b812..ae2fe66 100644
--- a/iris/policies/keras_pi_policy.py
+++ b/iris/policies/keras_pi_policy.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Policy network on learned predictive information representations."""
+
 from typing import Dict, List, Optional, Sequence, Union
 
 import gym
@@ -27,8 +28,9 @@
 class KerasPIPolicy(keras_policy.KerasPolicy):
   """Policy network on learned predictive information representations."""
 
-  def __init__(self, ob_space: gym.Space, ac_space: gym.Space,
-               **kwargs) -> None:
+  def __init__(
+      self, ob_space: gym.Space, ac_space: gym.Space, **kwargs
+  ) -> None:
     """Initializes a keras CNN policy. See the base class for more details."""
     super().__init__(ob_space=ob_space, ac_space=ac_space, **kwargs)
     # Build the encoder h that outputs [hidden_state, hidden_state_vision_only]
@@ -52,11 +54,14 @@ def get_representation_layers(self) -> List[tf.keras.layers.Layer]:
   def _create_vision_input_layers(self):
     vision_input_layers = []
     for image_label in self._image_input_labels:
-      image_size = self._ob_space[image_label].shape
-      vision_input_layers.append(tf.keras.layers.Input(
-          batch_input_shape=(1, image_size[0], image_size[1], image_size[2]),
-          dtype="float",
-          name="vision_input" + image_label))
+      vision_input_layers.append(
+          tf.keras.layers.Input(
+              shape=self._ob_space[image_label].shape,
+              batch_size=1,
+              dtype="float32",
+              name="vision_input" + image_label,
+          )
+      )
     return vision_input_layers
 
   def _create_other_input_layer(self):
@@ -70,9 +75,11 @@ def _create_other_input_layer(self):
     self._other_ob_dim = utils.flatdim(self._other_ob_space)
     if self._other_ob_dim > 0:
       return tf.keras.layers.Input(
-          batch_input_shape=(1, self._other_ob_dim),
-          dtype="float",
-          name="other_input")
+          shape=(self._other_ob_dim,),
+          batch_size=1,
+          dtype="float32",
+          name="other_input",
+      )
     return None
 
   def _create_vision_processing_layers(
@@ -85,7 +92,8 @@ def _create_vision_processing_layers(
       pool_strides: Optional[Sequence[int]] = None,
       final_vision_activation: str = "relu",
       use_spatial_softmax: bool = False,
-      **kwargs) -> tf.keras.layers.Layer:
+      **kwargs
+  ) -> tf.keras.layers.Layer:
     """Create keras layers for CNN image processing.
 
     Args:
@@ -113,17 +121,18 @@ def _create_vision_processing_layers(
       pool_strides = [None] * len(conv_filter_sizes)
 
     for filter_size, kernel_size, pool_size, pool_stride in zip(
-        conv_filter_sizes, conv_kernel_sizes, pool_sizes, pool_strides):
+        conv_filter_sizes, conv_kernel_sizes, pool_sizes, pool_strides
+    ):
       x = tf.keras.layers.Conv2D(
           filter_size,
           kernel_size=kernel_size,
           padding="valid",
-          activation=final_vision_activation)(
-              x)
+          activation=final_vision_activation,
+      )(x)
       if pool_size is not None:
-        x = tf.keras.layers.MaxPool2D(
-            pool_size=pool_size, strides=pool_stride)(
-                x)
+        x = tf.keras.layers.MaxPool2D(pool_size=pool_size, strides=pool_stride)(
+            x
+        )
 
     # Flattening or spatial softmax on image feature map.
     if use_spatial_softmax:
@@ -132,32 +141,36 @@ def _create_vision_processing_layers(
       x = tf.keras.layers.Flatten()(x)
 
     # Encoding image into a feature vector.
-    return tf.keras.layers.Dense(image_feature_length,
-                                 activation=final_vision_activation)(x)
-
-  def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
-                   state_dim: int,
-                   fc_layer_sizes: Sequence[int],
-                   **kwargs) -> None:
+    return tf.keras.layers.Dense(
+        image_feature_length, activation=final_vision_activation
+    )(x)
+
+  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
+  def _build_model(
+      self, state_dim: int, fc_layer_sizes: Sequence[int], **kwargs
+  ) -> None:
     # hidden state input
     state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, state_dim),
-        dtype="float",
-        name="s_input")
+        shape=(state_dim,), batch_size=1, dtype="float32", name="s_input"
+    )
 
     # policy
     x = state_input
     for fc_layer_size in fc_layer_sizes:
       x = tf.keras.layers.Dense(fc_layer_size, activation="tanh")(x)
     action_output = tf.keras.layers.Dense(self._ac_dim, activation="tanh")(x)
-    self.model = tf.keras.models.Model(inputs=state_input,
-                                       outputs=[action_output])
+    self.model = tf.keras.models.Model(
+        inputs=state_input, outputs=[action_output]
+    )
+
+  # pytype: enable=signature-mismatch  # overriding-parameter-count-checks
 
   def build_h(
       self,
       h_fc_layer_sizes: Sequence[int],
       image_input_label: Optional[Union[Sequence[str], str]] = None,
-      **kwargs):
+      **kwargs
+  ):
     # image_input_label: Label of image input in observation dictionary.
     if image_input_label is None:
       self._image_input_labels = []
@@ -171,9 +184,11 @@ def build_h(
     vision_outputs = []
     for vision_input in inputs:
       vision_outputs.append(
-          self._create_vision_processing_layers(x=vision_input, **kwargs))
-    vision_output = tf.keras.layers.concatenate(
-        vision_outputs) if vision_outputs else None
+          self._create_vision_processing_layers(x=vision_input, **kwargs)
+      )
+    vision_output = (
+        tf.keras.layers.concatenate(vision_outputs) if vision_outputs else None
+    )
 
     # Add other sensor observations.
     other_input = self._create_other_input_layer()
@@ -193,15 +208,16 @@ def build_h(
     outputs = [x, vision_output] if vision_output is not None else x
     self.h_model = tf.keras.models.Model(inputs=inputs, outputs=outputs)
 
-  def build_f(self,
-              state_dim: int,
-              f_fc_layer_sizes: Sequence[int],
-              num_supports: int = 51,
-              **kwargs):
+  def build_f(
+      self,
+      state_dim: int,
+      f_fc_layer_sizes: Sequence[int],
+      num_supports: int = 51,
+      **kwargs
+  ):
     state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, state_dim),
-        dtype="float",
-        name="s_input")
+        shape=(state_dim,), batch_size=1, dtype="float32", name="s_input"
+    )
     x = state_input
     for f_fc_layer_size in f_fc_layer_sizes:
       x = tf.keras.layers.Dense(f_fc_layer_size, activation="tanh")(x)
@@ -209,57 +225,52 @@ def build_f(self,
     v = tf.keras.layers.Dense(num_supports)(x)
     self.f_model = tf.keras.models.Model(inputs=state_input, outputs=[p, v])
 
-  def build_g(self,
-              state_dim: int,
-              g_fc_layer_sizes: Sequence[int],
-              **kwargs):
+  def build_g(self, state_dim: int, g_fc_layer_sizes: Sequence[int], **kwargs):
     state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, state_dim),
-        dtype="float",
-        name="s_input")
+        shape=(state_dim,), batch_size=1, dtype="float32", name="s_input"
+    )
     action_input = tf.keras.layers.Input(
-        batch_input_shape=(1, self._ac_dim),
-        dtype="float",
-        name="action_input")
+        shape=(self._ac_dim,),
+        batch_size=1,
+        dtype="float32",
+        name="action_input",
+    )
 
     x = tf.keras.layers.concatenate([state_input, action_input])
     for g_fc_layer_size in g_fc_layer_sizes:
       x = tf.keras.layers.Dense(g_fc_layer_size, activation="tanh")(x)
     u_next = tf.keras.layers.Dense(1)(x)
     s_next = tf.keras.layers.Dense(state_dim, activation="tanh")(x)
-    self.g_model = tf.keras.models.Model(inputs=[state_input, action_input],
-                                         outputs=[u_next, s_next])
+    self.g_model = tf.keras.models.Model(
+        inputs=[state_input, action_input], outputs=[u_next, s_next]
+    )
 
-  def build_px(self,
-               state_dim: int,
-               **kwargs):
+  def build_px(self, state_dim: int, **kwargs):
     state_input = tf.keras.layers.Input(
-        batch_input_shape=(1, state_dim),
-        dtype="float",
-        name="s_input")
+        shape=(state_dim,), batch_size=1, dtype="float32", name="s_input"
+    )
 
     x = state_input
     x = tf.keras.layers.Dense(64, activation="tanh")(x)
     z = tf.keras.layers.Dense(state_dim)(x)
     self.px_model = tf.keras.models.Model(inputs=state_input, outputs=z)
 
-  def build_py(self,
-               state_dim: int,
-               image_feature_length: int,
-               **kwargs):
+  def build_py(self, state_dim: int, image_feature_length: int, **kwargs):
     state_input = tf.keras.layers.Input(
-        batch_input_shape=(
-            1, image_feature_length * len(self._image_input_labels)),
-        dtype="float",
-        name="s_input")
+        shape=(image_feature_length * len(self._image_input_labels),),
+        batch_size=1,
+        dtype="float32",
+        name="s_input",
+    )
 
     x = state_input
     x = tf.keras.layers.Dense(64, activation="tanh")(x)
     z = tf.keras.layers.Dense(state_dim)(x)
     self.py_model = tf.keras.models.Model(inputs=state_input, outputs=z)
 
-  def act(self, ob: Union[np.ndarray, Dict[str, np.ndarray]]
-          ) -> Union[np.ndarray, Dict[str, np.ndarray]]:
+  def act(
+      self, ob: Union[np.ndarray, Dict[str, np.ndarray]]
+  ) -> Union[np.ndarray, Dict[str, np.ndarray]]:
     """Maps the observation to action.
 
     Args:
@@ -294,8 +305,9 @@ def act(self, ob: Union[np.ndarray, Dict[str, np.ndarray]]
     actions = utils.unflatten(self._ac_space, actions)
     return actions
 
-  def rollout(self, ob: Union[np.ndarray, Dict[str, np.ndarray]],
-              rollout_length: int) -> np.ndarray:
+  def rollout(
+      self, ob: Union[np.ndarray, Dict[str, np.ndarray]], rollout_length: int
+  ) -> np.ndarray:
     # Separate vision input and other observations.
     inputs = []
     for image_label in self._image_input_labels:
@@ -315,7 +327,7 @@ def rollout(self, ob: Union[np.ndarray, Dict[str, np.ndarray]],
 
     # Run model.
     s, _ = self.h_model(inputs)
-    reward = 0.
+    reward = 0.0
     for _ in range(rollout_length):
       action = self.model(s)
       u_next, s = self.g_model([s, action])
diff --git a/iris/policies/keras_toeplitz_policy.py b/iris/policies/keras_toeplitz_policy.py
index 493119c..5efa917 100644
--- a/iris/policies/keras_toeplitz_policy.py
+++ b/iris/policies/keras_toeplitz_policy.py
@@ -64,11 +64,13 @@ class Toeplitz(tf.keras.layers.Layer):
   before multiplication.
   """
 
-  def __init__(self,
-               units: int = 32,
-               activation: str = "tanh",
-               use_bias: bool = True,
-               kernel_initializer: str = "random_normal") -> None:
+  def __init__(
+      self,
+      units: int = 32,
+      activation: str = "tanh",
+      use_bias: bool = True,
+      kernel_initializer: str = "random_normal",
+  ) -> None:
     super().__init__()
     self._units = units
     self._activation = tf.keras.activations.get(activation)
@@ -77,21 +79,23 @@ def __init__(self,
 
   def build(self, input_shape: Sequence[int]) -> None:
     self._cross_weight = self.add_weight(
-        shape=(1,),
-        initializer=self._kernel_initializer,
-        trainable=True)
+        shape=(1,), initializer=self._kernel_initializer, trainable=True
+    )
     self._col = self.add_weight(
         shape=(input_shape[-1] - 1,),
         initializer=self._kernel_initializer,
-        trainable=True)
+        trainable=True,
+    )
     self._row = self.add_weight(
         shape=(self._units - 1,),
         initializer=self._kernel_initializer,
-        trainable=True)
+        trainable=True,
+    )
 
     if self._use_bias:
       self._b = self.add_weight(
-          shape=(self._units,), initializer="random_normal", trainable=True)
+          shape=(self._units,), initializer="random_normal", trainable=True
+      )
     else:
       self._b = 0
 
@@ -99,16 +103,22 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
     input_shape = inputs.shape
     toeplitz_row_size = max(input_shape[-1], self._units)
     extended_col = tf.concat(
-        [self._cross_weight,
-         self._col,
-         tf.zeros(toeplitz_row_size - input_shape[-1])], 0)
+        [
+            self._cross_weight,
+            self._col,
+            tf.zeros(toeplitz_row_size - input_shape[-1]),
+        ],
+        0,
+    )
     extended_row = tf.concat(
-        [self._cross_weight,
-         self._row,
-         tf.zeros(toeplitz_row_size - self._units)], 0)
-    weight_matrix = tf.linalg.LinearOperatorToeplitz(
-        extended_col,
-        extended_row)
+        [
+            self._cross_weight,
+            self._row,
+            tf.zeros(toeplitz_row_size - self._units),
+        ],
+        0,
+    )
+    weight_matrix = tf.linalg.LinearOperatorToeplitz(extended_col, extended_row)
     # TODO: Move Toeplitz weight matrix creation without zero
     # padding to the build function when TF supports rectangular Toeplitz
     # matrices
@@ -118,20 +128,21 @@ def call(self, inputs: tf.Tensor) -> tf.Tensor:
     extended_outputs = tf.matmul(extended_inputs, weight_matrix)
     output_shape = input_shape.as_list()
     output_shape[-1] = self._units
-    outputs = tf.slice(extended_outputs,
-                       [0] * len(output_shape),
-                       output_shape)
+    outputs = tf.slice(extended_outputs, [0] * len(output_shape), output_shape)
     return self._activation(outputs + self._b)
 
 
 class KerasToeplitzPolicy(keras_policy.KerasPolicy):
   """Policy class that computes action by running toeplitz network."""
 
-  def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
-                   hidden_layer_sizes: Sequence[int],
-                   activation: str = "tanh",
-                   use_bias: bool = False,
-                   kernel_initializer: str = "zeros") -> None:
+  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
+  def _build_model(
+      self,
+      hidden_layer_sizes: Sequence[int],
+      activation: str = "tanh",
+      use_bias: bool = False,
+      kernel_initializer: str = "zeros",
+  ) -> None:
     """Constructs a keras feed forward neural network model.
 
     Args:
@@ -142,20 +153,24 @@ def _build_model(self,  # pytype: disable=signature-mismatch  # overriding-param
     """
     # Creates model.
     input_layer = tf.keras.layers.Input(
-        batch_input_shape=(1, self._ob_dim), dtype="float", name="input")
+        shape=(self._ob_dim,), batch_size=1, dtype="float32", name="input"
+    )
     x = input_layer
     for layer_size in hidden_layer_sizes:
       x = Toeplitz(
           layer_size,
           activation=activation,
           use_bias=use_bias,
-          kernel_initializer=kernel_initializer)(
-              x)
+          kernel_initializer=kernel_initializer,
+      )(x)
     output_layer = Toeplitz(
         self._ac_dim,
         activation=activation,
         use_bias=use_bias,
-        kernel_initializer=kernel_initializer)(
-            x)
-    self.model = tf.keras.models.Model(inputs=[input_layer],
-                                       outputs=[output_layer])
+        kernel_initializer=kernel_initializer,
+    )(x)
+    self.model = tf.keras.models.Model(
+        inputs=[input_layer], outputs=[output_layer]
+    )
+
+  # pytype: enable=signature-mismatch  # overriding-parameter-count-checks
diff --git a/iris/policies/spatial_softmax_test.py b/iris/policies/spatial_softmax_test.py
index 817009d..adb0dae 100644
--- a/iris/policies/spatial_softmax_test.py
+++ b/iris/policies/spatial_softmax_test.py
@@ -12,17 +12,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+# pytype: disable=attribute-error
 from iris.policies import spatial_softmax
 import tensorflow as tf2
 import tensorflow.compat.v1 as tf
+from absl.testing import absltest
 
-
-test_utils = tf2._keras_internal.testing_infra.test_utils  # pylint:disable=protected-access
+# TODO: Remove this try/except once import is fixed.
+try:
+  test_utils = tf2._keras_internal.testing_infra.test_utils  # pylint:disable=protected-access
+except AttributeError:
+  test_utils = None
 
 _INPUT_SHAPE = (16, 32, 32, 128)
 _TEMPERATURE = 2.5
 
 
+@absltest.skipIf(test_utils is None, 'test_utils not available')
 class SpatialSoftmaxTest(tf.test.TestCase):
 
   def test_with_default(self):
diff --git a/iris/worker.py b/iris/worker.py
index 8d2ae11..ae34881 100644
--- a/iris/worker.py
+++ b/iris/worker.py
@@ -31,7 +31,6 @@
 from iris.policies import nas_policy
 import numpy as np
 import pyglove as pg
-import qj_global  # pylint: disable=unused-import
 import reverb
 from tf_agents.environments import gym_wrapper
 from tf_agents.google.utils import mp4_video_recorder
diff --git a/requirements.txt b/requirements.txt
index 9dd6f2b..38f2bce 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,14 +3,25 @@ typing  # Version dependent on Python version.
 pytest  # Use the latest version to match github workflow.
 absl-py>=1.0.0
 
-# Distributed systems libraries.
-dm-launchpad
-
-# Configuration + Experimentation
-ml-collections
-
 # Numerical packages.
 numpy>=1.21.5
 jax  # Use latest version.
 jaxlib  # Use latest version.
 flax  # Use latest version.
+tensorflow  # TODO(team): Resolve version conflicts.
+
+# Distributed systems libraries.
+# NOTE: Requires tensorflow~=2.8.0 to avoid proto issues.
+dm-launchpad[tensorflow]
+dm-reverb[tensorflow]
+
+# Configuration + Experimentation
+ml-collections>=0.1.1
+gin-config>=0.5.0
+
+# Reinforcement Learning
+gym
+tf-agents  # NOTE: Requires tensorflow>=2.15.0 for TFP compatibility.
+
+# Optimization packages
+pyglove
diff --git a/setup.py b/setup.py
index 1343159..ac1b3ec 100644
--- a/setup.py
+++ b/setup.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 """Setup for pip package."""
+
 import setuptools
 
 
@@ -34,10 +35,11 @@ def _parse_requirements(requirements_txt_path: str) -> list[str]:
 
 setuptools.setup(
     name='google-iris',
-    version='1.0',
+    version='0.0.1.alpha',
     description='Iris',
     author='Iris Team',
     author_email='jaindeepali@google.com',
     install_requires=_parse_requirements('requirements.txt'),
     packages=setuptools.find_packages(),
-    )
+    python_requires='>=3.10',
+)