diff --git a/.bazelversion b/.bazelversion
index 4be2c727..f22d756d 100644
--- a/.bazelversion
+++ b/.bazelversion
@@ -1 +1 @@
-6.5.0
\ No newline at end of file
+6.5.0
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 35efe54c..69b851c4 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -22,7 +22,7 @@ on:
     branches:
       - master
   release:
-    types: [published]      
+    types: [published]
 
 jobs:
   build:
diff --git a/.github/workflows/ci-lint.yml b/.github/workflows/ci-lint.yml
new file mode 100644
index 00000000..dede434d
--- /dev/null
+++ b/.github/workflows/ci-lint.yml
@@ -0,0 +1,21 @@
+name: pre-commit
+
+on:
+  pull_request:
+  push:
+     branches: [master]
+
+jobs:
+  pre-commit:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v4.1.7
+      with:
+        # Ensure the full history is fetched
+        # This is required to run pre-commit on a specific set of commits
+        # TODO: Remove this when all the pre-commit issues are fixed
+        fetch-depth: 0
+    - uses: actions/setup-python@v5.1.1
+      with:
+        python-version: 3.13
+    - uses: pre-commit/action@v3.0.1
diff --git a/.gitignore b/.gitignore
index fdf94603..3ecf3ba3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -126,4 +126,4 @@ dmypy.json
 .pyre/
 
 # pb2.py files
-*_pb2.py
\ No newline at end of file
+*_pb2.py
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 00000000..d74e3dbe
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,39 @@
+# pre-commit is a tool to perform a predefined set of tasks manually and/or
+# automatically before git commits are made.
+#
+# Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level
+#
+# Common tasks
+#
+# - Register git hooks: pre-commit install --install-hooks
+# - Run on all files:   pre-commit run --all-files
+#
+# These pre-commit hooks are run as CI.
+#
+# NOTE: if it can be avoided, add configs/args in pyproject.toml or below instead of creating a new `.config.file`.
+# https://pre-commit.ci/#configuration
+ci:
+  autoupdate_schedule: monthly
+  autofix_commit_msg: |
+    [pre-commit.ci] Apply automatic pre-commit fixes
+
+repos:
+  # general
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.6.0
+    hooks:
+      - id: end-of-file-fixer
+        exclude: '\.svg$|\.patch$'
+      - id: trailing-whitespace
+        exclude: '\.svg$|\.patch$'
+      - id: check-json
+      - id: check-yaml
+        args: [--allow-multiple-documents, --unsafe]
+      - id: check-toml
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.5.6
+    hooks:
+      - id: ruff
+        args: ["--fix"]
+      - id: ruff-format
diff --git a/LICENSE b/LICENSE
index c1d8805b..f0e600d3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -226,4 +226,4 @@ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/pyproject.toml b/pyproject.toml
index 6a345a2e..27839ccc 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -21,3 +21,112 @@ requires = [
   # (b/206845101)
   "numpy~=1.22.0",
 ]
+
+[tool.ruff]
+line-length = 88
+
+[tool.ruff.lint]
+select = [
+    # pycodestyle
+    "E",
+    "W",
+    # Pyflakes
+    "F",
+    # pyupgrade
+    "UP",
+    # flake8-bugbear
+    "B",
+    # flake8-simplify
+    "SIM",
+    # isort
+    "I",
+    # pep8 naming
+    "N",
+    # pydocstyle
+    "D",
+    # annotations
+    "ANN",
+    # debugger
+    "T10",
+    # flake8-pytest
+    "PT",
+    # flake8-return
+    "RET",
+    # flake8-unused-arguments
+    "ARG",
+    # flake8-fixme
+    "FIX",
+    # flake8-eradicate
+    "ERA",
+    # pandas-vet
+    "PD",
+    # numpy-specific rules
+    "NPY",
+]
+
+ignore = [
+    "D104",   # Missing docstring in public package
+    "D100",   # Missing docstring in public module
+    "D211",   # No blank line before class
+    "PD901",  # Avoid using 'df' for pandas dataframes. Perfectly fine in functions with limited scope
+    "ANN201", # Missing return type annotation for public function (makes no sense for NoneType return types...)
+    "ANN101", # Missing type annotation for `self`
+    "ANN204", # Missing return type annotation for special method
+    "ANN002", # Missing type annotation for `*args`
+    "ANN003", # Missing type annotation for `**kwargs`
+    "D105",   # Missing docstring in magic method
+    "D203",   # 1 blank line before after class docstring
+    "D204",   # 1 blank line required after class docstring
+    "D413",   # 1 blank line after parameters
+    "SIM108", # Simplify if/else to one line; not always clearer
+    "D206",   # Docstrings should be indented with spaces; unnecessary when running ruff-format
+    "E501",   # Line length too long; unnecessary when running ruff-format
+    "W191",   # Indentation contains tabs; unnecessary when running ruff-format
+
+    # FIX AND REMOVE BELOW CODES:
+    "ANN001", # Missing type annotation for function argument
+    "ANN102", # Missing type annotation for `cls` in classmethod
+    "ANN202", # Missing return type annotation for private function
+    "ANN205", # Missing return type annotation for staticmethod
+    "ANN206", # Missing return type annotation for classmethod `setUpClass`
+    "ANN401", # Dynamically typed expressions (typing.Any) are disallowed
+    "ARG001", # Unused function argument
+    "ARG002", # Unused method argument
+    "ARG005", # Unused lambda argument
+    "B007",   # Loop control variable `...` not used within loop body
+    "B008",   # Do not perform function call in argument defaults
+    "B904",   # Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None`
+    "D101",   # Missing docstring in public class
+    "D102",   # Missing docstring in public method
+    "D103",   # Missing docstring in public function
+    "D107",   # Missing docstring in `__init__`
+    "D401",   # First line of docstring should be in imperative mood
+    "D404",   # First word of the docstring should not be "This"
+    "D417",   # Missing argument description in the docstring
+    "E731",   # Do not assign a `lambda` expression, use a `def`
+    "E741",   # Ambiguous variable name
+    "ERA001", # Found commented-out code
+    "F401",   # `...` imported but unused
+    "F403",   # `from ... import *` used; unable to detect undefined names
+    "FIX002", # Line contains TODO, consider resolving the issue
+    "FIX004", # Line contains HACK, consider resolving the issue
+    "N802",   # Function name should be lowercase
+    "NPY002", # Replace legacy `np.random.rand` call with `np.random.Generator`
+    "PD011",  # Use `.to_numpy()` instead of `.values`
+    "PT009",  # Use a regular `assert` instead of unittest-style asserts
+    "PT018",  # Assertion should be broken down into multiple parts
+    "PT027",  # Use `pytest.raises` instead of unittest-style `assertRaisesRegex`
+    "RET504", # Unnecessary assignment to `...` before `return` statement
+    "RET505", # Unnecessary `elif` or `else` after `return` statement
+    "SIM103", # Return the negated condition directly
+    "SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass`
+    "SIM117", # Use a single `with` statement with multiple contexts instead of nested `with` statements
+    "SIM118", # Use `key in dict` instead of `key in dict.keys()`
+    "SIM212", # Use `... if ... else ...` instead of `... if not ... else ...`
+    "UP008",  # Use `super()` instead of `super(__class__, self)`
+    "UP028",  # Replace `yield` over `for` loop with `yield from`
+    "UP031",  # Use format specifiers instead of percent format
+]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["F401"]
diff --git a/setup.py b/setup.py
index c8427cf7..4992b714 100644
--- a/setup.py
+++ b/setup.py
@@ -18,199 +18,203 @@
 import shutil
 import subprocess
 import sys
-
+from distutils.command import build
 
 # pylint:disable=g-bad-import-order
 # setuptools must be imported prior to distutils.
 import setuptools
-from distutils.command import build
-# pylint:enable=g-bad-import-order
 
-from setuptools import find_packages
-from setuptools import setup
+# pylint:enable=g-bad-import-order
+from setuptools import find_packages, setup
 from setuptools.command.install import install
 from setuptools.dist import Distribution
 
 
 class _BuildCommand(build.build):
-  """Build everything that is needed to install.
+    """Build everything that is needed to install.
+
+    This overrides the original distutils "build" command to to run gen_proto
+    command before any sub_commands.
 
-  This overrides the original distutils "build" command to to run gen_proto
-  command before any sub_commands.
+    build command is also invoked from bdist_wheel and install command, therefore
+    this implementation covers the following commands:
+      - pip install . (which invokes bdist_wheel)
+      - python setup.py install (which invokes install command)
+      - python setup.py bdist_wheel (which invokes bdist_wheel command)
+    """
 
-  build command is also invoked from bdist_wheel and install command, therefore
-  this implementation covers the following commands:
-    - pip install . (which invokes bdist_wheel)
-    - python setup.py install (which invokes install command)
-    - python setup.py bdist_wheel (which invokes bdist_wheel command)
-  """
+    def _build_cc_extensions(self):
+        return True
 
-  def _build_cc_extensions(self):
-    return True
-  # Add "bazel_build" command as the first sub_command of "build". Each
-  # sub_command of "build" (e.g. "build_py", "build_ext", etc.) is executed
-  # sequentially when running a "build" command, if the second item in the tuple
-  # (predicate method) is evaluated to true.
-  sub_commands = [
-      ('bazel_build', _build_cc_extensions),
-  ] + build.build.sub_commands
+    # Add "bazel_build" command as the first sub_command of "build". Each
+    # sub_command of "build" (e.g. "build_py", "build_ext", etc.) is executed
+    # sequentially when running a "build" command, if the second item in the tuple
+    # (predicate method) is evaluated to true.
+    sub_commands = [
+        ("bazel_build", _build_cc_extensions),
+    ] + build.build.sub_commands
 
 
 # TFX BSL is not a purelib. However because of the extension module is not
 # built by setuptools, it will be incorrectly treated as a purelib. The
 # following works around that bug.
 class _InstallPlatlibCommand(install):
-
-  def finalize_options(self):
-    install.finalize_options(self)
-    self.install_lib = self.install_platlib
+    def finalize_options(self):
+        install.finalize_options(self)
+        self.install_lib = self.install_platlib
 
 
 class _BazelBuildCommand(setuptools.Command):
-  """Generate proto stub files in python.
-
-  Running this command will populate foo_pb2.py file next to your foo.proto
-  file.
-  """
-
-  def initialize_options(self):
-    pass
-
-  def finalize_options(self):
-    self._bazel_cmd = shutil.which('bazel')
-    if not self._bazel_cmd:
-      raise RuntimeError(
-          'Could not find "bazel" binary. Please visit '
-          'https://docs.bazel.build/versions/master/install.html for '
-          'installation instruction.')
-    self._additional_build_options = ['--verbose_failures', '--sandbox_debug']
-    if platform.system() == 'Darwin':
-      # This flag determines the platform qualifier of the macos wheel.
-      if platform.machine() == 'arm64':
-        self._additional_build_options = ['--macos_minimum_os=11.0',
-                                          '--config=macos_arm64']
-      else:
-        self._additional_build_options = ['--macos_minimum_os=10.14']
-
-  def run(self):
-    subprocess.check_call(
-        [self._bazel_cmd, 'run', '-c', 'opt']
-        + self._additional_build_options
-        + ['//tfx_bsl:move_generated_files'],
-        # Bazel should be invoked in a directory containing bazel WORKSPACE
-        # file, which is the root directory.
-        cwd=os.path.dirname(os.path.realpath(__file__)),
-        env=dict(os.environ, PYTHON_BIN_PATH=sys.executable),
-    )
+    """Generate proto stub files in python.
+
+    Running this command will populate foo_pb2.py file next to your foo.proto
+    file.
+    """
+
+    def initialize_options(self):
+        pass
+
+    def finalize_options(self):
+        self._bazel_cmd = shutil.which("bazel")
+        if not self._bazel_cmd:
+            raise RuntimeError(
+                'Could not find "bazel" binary. Please visit '
+                "https://docs.bazel.build/versions/master/install.html for "
+                "installation instruction."
+            )
+        self._additional_build_options = ["--verbose_failures", "--sandbox_debug"]
+        if platform.system() == "Darwin":
+            # This flag determines the platform qualifier of the macos wheel.
+            if platform.machine() == "arm64":
+                self._additional_build_options = [
+                    "--macos_minimum_os=11.0",
+                    "--config=macos_arm64",
+                ]
+            else:
+                self._additional_build_options = ["--macos_minimum_os=10.14"]
+
+    def run(self):
+        subprocess.check_call(
+            [self._bazel_cmd, "run", "-c", "opt"]
+            + self._additional_build_options
+            + ["//tfx_bsl:move_generated_files"],
+            # Bazel should be invoked in a directory containing bazel WORKSPACE
+            # file, which is the root directory.
+            cwd=os.path.dirname(os.path.realpath(__file__)),
+            env=dict(os.environ, PYTHON_BIN_PATH=sys.executable),
+        )
 
 
 class _BinaryDistribution(Distribution):
-  """This class is needed in order to create OS specific wheels."""
+    """This class is needed in order to create OS specific wheels."""
 
-  def is_pure(self):
-    return False
+    def is_pure(self):
+        return False
 
-  def has_ext_modules(self):
-    return True
+    def has_ext_modules(self):
+        return True
 
 
 def select_constraint(default, nightly=None, git_master=None):
-  """Select dependency constraint based on TFX_DEPENDENCY_SELECTOR env var."""
-  selector = os.environ.get('TFX_DEPENDENCY_SELECTOR')
-  if selector == 'UNCONSTRAINED':
-    return ''
-  elif selector == 'NIGHTLY' and nightly is not None:
-    return nightly
-  elif selector == 'GIT_MASTER' and git_master is not None:
-    return git_master
-  else:
-    return default
+    """Select dependency constraint based on TFX_DEPENDENCY_SELECTOR env var."""
+    selector = os.environ.get("TFX_DEPENDENCY_SELECTOR")
+    if selector == "UNCONSTRAINED":
+        return ""
+    elif selector == "NIGHTLY" and nightly is not None:
+        return nightly
+    elif selector == "GIT_MASTER" and git_master is not None:
+        return git_master
+    else:
+        return default
 
 
 # Get version from version module.
-with open('tfx_bsl/version.py') as fp:
-  globals_dict = {}
-  exec(fp.read(), globals_dict)  # pylint: disable=exec-used
-__version__ = globals_dict['__version__']
+with open("tfx_bsl/version.py") as fp:
+    globals_dict = {}
+    exec(fp.read(), globals_dict)  # pylint: disable=exec-used
+__version__ = globals_dict["__version__"]
 
 # Get the long description from the README file.
-with open('README.md') as fp:
-  _LONG_DESCRIPTION = fp.read()
+with open("README.md") as fp:
+    _LONG_DESCRIPTION = fp.read()
 
 setup(
-    name='tfx-bsl',
+    name="tfx-bsl",
     version=__version__,
-    author='Google LLC',
-    author_email='tensorflow-extended-dev@googlegroups.com',
-    license='Apache 2.0',
+    author="Google LLC",
+    author_email="tensorflow-extended-dev@googlegroups.com",
+    license="Apache 2.0",
     classifiers=[
-        'Development Status :: 5 - Production/Stable',
-        'Intended Audience :: Developers',
-        'Intended Audience :: Education',
-        'Intended Audience :: Science/Research',
-        'License :: OSI Approved :: Apache Software License',
-        'Operating System :: MacOS :: MacOS X',
-        'Operating System :: POSIX :: Linux',
-        'Programming Language :: Python',
-        'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.9',
-        'Programming Language :: Python :: 3.10',
-        'Programming Language :: Python :: 3.11',
-        'Programming Language :: Python :: 3 :: Only',
-        'Topic :: Scientific/Engineering',
-        'Topic :: Scientific/Engineering :: Artificial Intelligence',
-        'Topic :: Scientific/Engineering :: Mathematics',
-        'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',
-        'Topic :: Software Development :: Libraries :: Python Modules',
+        "Development Status :: 5 - Production/Stable",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Education",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: Apache Software License",
+        "Operating System :: MacOS :: MacOS X",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Programming Language :: Python :: 3 :: Only",
+        "Topic :: Scientific/Engineering",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+        "Topic :: Scientific/Engineering :: Mathematics",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries",
+        "Topic :: Software Development :: Libraries :: Python Modules",
     ],
     namespace_packages=[],
     # Make sure to sync the versions of common dependencies (absl-py, numpy,
     # and protobuf) with TF.
     install_requires=[
-        'absl-py>=0.9,<2.0.0',
+        "absl-py>=0.9,<2.0.0",
         'apache-beam[gcp]>=2.53,<3;python_version>="3.11"',
         'apache-beam[gcp]>=2.50,<2.51;python_version<"3.11"',
-        'google-api-python-client>=1.7.11,<2',
-        'numpy>=1.22.0',
-        'pandas>=1.0,<2',
+        "google-api-python-client>=1.7.11,<2",
+        "numpy>=1.22.0",
+        "pandas>=1.0,<2",
         'protobuf>=4.25.2,<6.0.0;python_version>="3.11"',
         'protobuf>=4.21.6,<6.0.0;python_version<"3.11"',
-        'pyarrow>=10,<11',
-        'tensorflow>=2.17,<2.18',
-        'tensorflow-metadata'
+        "pyarrow>=10,<11",
+        "tensorflow>=2.17,<2.18",
+        "tensorflow-metadata"
         + select_constraint(
-            default='>=1.17.1,<1.18.0',
-            nightly='>=1.18.0.dev',
-            git_master='@git+https://github.com/tensorflow/metadata@master',
+            default=">=1.17.1,<1.18.0",
+            nightly=">=1.18.0.dev",
+            git_master="@git+https://github.com/tensorflow/metadata@master",
         ),
-        'tensorflow-serving-api'
+        "tensorflow-serving-api"
         + select_constraint(
-            default='>=2.13.0,<3',
-            nightly='>=2.13.0.dev',
-            git_master='@git+https://github.com/tensorflow/serving@master',
+            default=">=2.13.0,<3",
+            nightly=">=2.13.0.dev",
+            git_master="@git+https://github.com/tensorflow/serving@master",
         ),
     ],
-    python_requires='>=3.9,<4',
+    extras_require={
+        "dev": ["pre-commit"],
+    },
+    python_requires=">=3.9,<4",
     packages=find_packages(),
     include_package_data=True,
-    package_data={'': ['*.lib', '*.pyd', '*.so']},
+    package_data={"": ["*.lib", "*.pyd", "*.so"]},
     zip_safe=False,
     distclass=_BinaryDistribution,
     description=(
-        'tfx_bsl (TFX Basic Shared Libraries) contains libraries '
-        'shared by many TFX (TensorFlow eXtended) libraries and '
-        'components.'
+        "tfx_bsl (TFX Basic Shared Libraries) contains libraries "
+        "shared by many TFX (TensorFlow eXtended) libraries and "
+        "components."
     ),
     long_description=_LONG_DESCRIPTION,
-    long_description_content_type='text/markdown',
-    keywords='tfx bsl',
-    url='https://www.tensorflow.org/tfx',
-    download_url='https://github.com/tensorflow/tfx-bsl/tags',
+    long_description_content_type="text/markdown",
+    keywords="tfx bsl",
+    url="https://www.tensorflow.org/tfx",
+    download_url="https://github.com/tensorflow/tfx-bsl/tags",
     requires=[],
     cmdclass={
-        'install': _InstallPlatlibCommand,
-        'build': _BuildCommand,
-        'bazel_build': _BazelBuildCommand,
+        "install": _InstallPlatlibCommand,
+        "build": _BuildCommand,
+        "bazel_build": _BazelBuildCommand,
     },
 )
diff --git a/tfx_bsl/arrow/array_util.py b/tfx_bsl/arrow/array_util.py
index 9526a8b0..f7e6772b 100644
--- a/tfx_bsl/arrow/array_util.py
+++ b/tfx_bsl/arrow/array_util.py
@@ -13,180 +13,202 @@
 # limitations under the License.
 """Arrow Array utilities."""
 
-from typing import Tuple, Optional, Union
+from typing import Optional, Tuple, Union
 
 import numpy as np
 import pyarrow as pa
+
 # pytype: disable=import-error
 # pylint: disable=g-import-not-at-top
 # pylint: disable=unused-import
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import ListLengthsFromListArray
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import GetElementLengths
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import GetFlattenedArrayParentIndices
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import GetArrayNullBitmapAsByteArray
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import GetBinaryArrayTotalByteSize
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import IndexIn
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import MakeListArrayFromParentIndicesAndValues as _MakeListArrayFromParentIndicesAndValues
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import CooFromListArray
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import FillNullLists
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import GetByteSize
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import CountInvalidUTF8
+    from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import (
+        CooFromListArray,
+        CountInvalidUTF8,
+        FillNullLists,
+        GetArrayNullBitmapAsByteArray,
+        GetBinaryArrayTotalByteSize,
+        GetByteSize,
+        GetElementLengths,
+        GetFlattenedArrayParentIndices,
+        IndexIn,
+        ListLengthsFromListArray,
+    )
+    from tfx_bsl.cc.tfx_bsl_extension.arrow.array_util import (
+        MakeListArrayFromParentIndicesAndValues as _MakeListArrayFromParentIndicesAndValues,
+    )
 except ImportError:
-  import sys
-  sys.stderr.write("Error importing tfx_bsl_extension.arrow.array_util. "
-                   "Some tfx_bsl functionalities are not available")
+    import sys
+
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.arrow.array_util. "
+        "Some tfx_bsl functionalities are not available"
+    )
 # pytype: enable=import-error
 # pylint: enable=g-import-not-at-top
 # pylint: enable=unused-import
 
 
 def ToSingletonListArray(array: pa.Array) -> pa.Array:  # pylint: disable=invalid-name
-  """Converts an array of `type` to a `LargeListArray<type>`.
-
-  Where result[i] is null if array[i] is null; [array[i]] otherwise.
-
-  Args:
-    array: an arrow Array.
-
-  Returns:
-    a LargeListArray.
-  """
-  array_size = len(array)
-  # fast path: values are not copied.
-  if array.null_count == 0:
+    """Converts an array of `type` to a `LargeListArray<type>`.
+
+    Where result[i] is null if array[i] is null; [array[i]] otherwise.
+
+    Args:
+    ----
+      array: an arrow Array.
+
+    Returns:
+    -------
+      a LargeListArray.
+    """
+    array_size = len(array)
+    # fast path: values are not copied.
+    if array.null_count == 0:
+        return pa.LargeListArray.from_arrays(
+            pa.array(np.arange(0, array_size + 1, dtype=np.int32)), array
+        )
+
+    # null_mask[i] = 1 iff array[i] is null.
+    null_mask = np.asarray(GetArrayNullBitmapAsByteArray(array))
+    # presence_mask[i] = 0 iff array[i] is null
+    presence_mask = np.subtract(1, null_mask, dtype=np.uint8)
+    offsets_np = np.zeros((array_size + 1,), np.int32)
+    np.cumsum(presence_mask, out=offsets_np[1:])
+
+    # This is the null mask over offsets (but ListArray.from_arrays() uses it as
+    # the null mask for the ListArray), so its length is array_size +1, but the
+    # last element is always False.
+    list_array_null_mask = np.zeros((array_size + 1,), bool)
+    list_array_null_mask[:array_size] = null_mask.view(bool)
+    values_non_null = array.take(pa.array(np.flatnonzero(presence_mask)))
     return pa.LargeListArray.from_arrays(
-        pa.array(np.arange(0, array_size + 1, dtype=np.int32)), array)
-
-  # null_mask[i] = 1 iff array[i] is null.
-  null_mask = np.asarray(GetArrayNullBitmapAsByteArray(array))
-  # presence_mask[i] = 0 iff array[i] is null
-  presence_mask = np.subtract(1, null_mask, dtype=np.uint8)
-  offsets_np = np.zeros((array_size + 1,), np.int32)
-  np.cumsum(presence_mask, out=offsets_np[1:])
-
-  # This is the null mask over offsets (but ListArray.from_arrays() uses it as
-  # the null mask for the ListArray), so its length is array_size +1, but the
-  # last element is always False.
-  list_array_null_mask = np.zeros((array_size + 1,), bool)
-  list_array_null_mask[:array_size] = null_mask.view(bool)
-  values_non_null = array.take(pa.array(np.flatnonzero(presence_mask)))
-  return pa.LargeListArray.from_arrays(
-      pa.array(offsets_np, mask=list_array_null_mask), values_non_null)
-
-
-def MakeListArrayFromParentIndicesAndValues(num_parents: int,   # pylint: disable=invalid-name
-                                            parent_indices: pa.Array,
-                                            values: pa.Array,
-                                            empty_list_as_null: bool = True):
-  """Makes an Arrow LargeListArray from parent indices and values.
-
-  For example, if `num_parents = 6`, `parent_indices = [0, 1, 1, 3, 3]` and
-  `values` is (an arrow Array of) `[0, 1, 2, 3, 4]`, then the result will
-  be a `pa.LargeListArray` of integers:
-  `[[0], [1, 2], <empty_list>, [3, 4], <empty_list>]`
-  where `<empty_list>` is `null` if `empty_list_as_null` is True, or `[]` if
-  False.
-
-  Args:
-    num_parents: integer, number of sub-list. Must be greater than or equal to
-      `max(parent_indices) + 1`.
-    parent_indices: an int64 pa.Array. Must be sorted in increasing order.
-    values: a pa.Array. Its length must equal to the length of `parent_indices`.
-    empty_list_as_null: if True, empty sub-lists will become null elements
-      in the result ListArray. Otherwise they become empty sub-lists.
-
-  Returns:
-    A LargeListArray.
-  """
-  return _MakeListArrayFromParentIndicesAndValues(num_parents, parent_indices,
-                                                  values, empty_list_as_null)
+        pa.array(offsets_np, mask=list_array_null_mask), values_non_null
+    )
+
+
+def MakeListArrayFromParentIndicesAndValues(
+    num_parents: int,  # pylint: disable=invalid-name
+    parent_indices: pa.Array,
+    values: pa.Array,
+    empty_list_as_null: bool = True,
+):
+    """Makes an Arrow LargeListArray from parent indices and values.
+
+    For example, if `num_parents = 6`, `parent_indices = [0, 1, 1, 3, 3]` and
+    `values` is (an arrow Array of) `[0, 1, 2, 3, 4]`, then the result will
+    be a `pa.LargeListArray` of integers:
+    `[[0], [1, 2], <empty_list>, [3, 4], <empty_list>]`
+    where `<empty_list>` is `null` if `empty_list_as_null` is True, or `[]` if
+    False.
+
+    Args:
+    ----
+      num_parents: integer, number of sub-list. Must be greater than or equal to
+        `max(parent_indices) + 1`.
+      parent_indices: an int64 pa.Array. Must be sorted in increasing order.
+      values: a pa.Array. Its length must equal to the length of `parent_indices`.
+      empty_list_as_null: if True, empty sub-lists will become null elements
+        in the result ListArray. Otherwise they become empty sub-lists.
+
+    Returns:
+    -------
+      A LargeListArray.
+    """
+    return _MakeListArrayFromParentIndicesAndValues(
+        num_parents, parent_indices, values, empty_list_as_null
+    )
 
 
 def is_list_like(data_type: pa.DataType) -> bool:
-  """Returns true if an Arrow type is list-like."""
-  return pa.types.is_list(data_type) or pa.types.is_large_list(data_type)
+    """Returns true if an Arrow type is list-like."""
+    return pa.types.is_list(data_type) or pa.types.is_large_list(data_type)
 
 
 def get_innermost_nested_type(arrow_type: pa.DataType) -> pa.DataType:
-  """Returns the innermost type of a nested list type."""
-  while is_list_like(arrow_type):
-    arrow_type = arrow_type.value_type
-  return arrow_type
+    """Returns the innermost type of a nested list type."""
+    while is_list_like(arrow_type):
+        arrow_type = arrow_type.value_type
+    return arrow_type
 
 
 def flatten_nested(
     array: pa.Array, return_parent_indices: bool = False
-    ) -> Tuple[pa.Array, Optional[np.ndarray]]:
-  """Flattens all the list arrays nesting an array.
-
-  If `array` is not list-like, itself will be returned.
-
-  Args:
-    array: pa.Array to flatten.
-    return_parent_indices: If True, also returns the parent indices array.
-
-  Returns:
-    A tuple. The first term is the flattened array. The second term is None
-    if `return_parent_indices` is False; otherwise it's a parent indices array
-    parallel to the flattened array: if parent_indices[i] = j, then
-    flattened_array[i] belongs to the j-th element of the input array.
-  """
-  parent_indices = None
-
-  while is_list_like(array.type):
-    if return_parent_indices:
-      cur_parent_indices = GetFlattenedArrayParentIndices(
-          array).to_numpy()
-      if parent_indices is None:
-        parent_indices = cur_parent_indices
-      else:
-        parent_indices = parent_indices[cur_parent_indices]
-    array = array.flatten()
-
-  # the array is not nested at the first place.
-  if return_parent_indices and parent_indices is None:
-    parent_indices = np.arange(len(array))
-  return array, parent_indices
+) -> Tuple[pa.Array, Optional[np.ndarray]]:
+    """Flattens all the list arrays nesting an array.
+
+    If `array` is not list-like, itself will be returned.
+
+    Args:
+    ----
+      array: pa.Array to flatten.
+      return_parent_indices: If True, also returns the parent indices array.
+
+    Returns:
+    -------
+      A tuple. The first term is the flattened array. The second term is None
+      if `return_parent_indices` is False; otherwise it's a parent indices array
+      parallel to the flattened array: if parent_indices[i] = j, then
+      flattened_array[i] belongs to the j-th element of the input array.
+    """
+    parent_indices = None
+
+    while is_list_like(array.type):
+        if return_parent_indices:
+            cur_parent_indices = GetFlattenedArrayParentIndices(array).to_numpy()
+            if parent_indices is None:
+                parent_indices = cur_parent_indices
+            else:
+                parent_indices = parent_indices[cur_parent_indices]
+        array = array.flatten()
+
+    # the array is not nested at the first place.
+    if return_parent_indices and parent_indices is None:
+        parent_indices = np.arange(len(array))
+    return array, parent_indices
 
 
 def get_field(struct_array: pa.StructArray, field: Union[str, int]) -> pa.Array:
-  """Returns struct_array.field(field) with null propagation.
-
-  This function is equivalent to struct_array.field() but correctly handles
-  null propagation (the parent struct's null values are propagated to children).
-
-  Args:
-    struct_array: A struct array which should be queried.
-    field: The request field to retrieve.
-
-  Returns:
-    A pa.Array containing the requested field.
-
-  Raises:
-    KeyError: If field is not a child field in struct_array.
-  """
-  child_array = struct_array.field(field)
-
-  # In case all values are present then there's no need for special handling.
-  # We can return child_array as is to avoid a performance penalty caused by
-  # constructing and flattening the returned array.
-  if struct_array.null_count == 0:
-    return child_array
-  # is_valid returns a BooleanArray with two buffers the buffer at offset
-  # 0 is always None and buffer 1 contains the data on which fields are
-  # valid/not valid.
-  # (https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout)
-  validity_bitmap_buffer = struct_array.is_valid().buffers()[1]
-
-  # Construct a new struct array with a single field.  Calling flatten() on the
-  # new array guarantees validity bitmaps are merged correctly.
-  new_type = pa.struct([pa.field(field, child_array.type)])
-  filtered_struct = pa.StructArray.from_buffers(
-      new_type,
-      len(struct_array), [validity_bitmap_buffer],
-      null_count=struct_array.null_count,
-      children=[child_array])
-  return filtered_struct.flatten()[0]
-
+    """Returns struct_array.field(field) with null propagation.
+
+    This function is equivalent to struct_array.field() but correctly handles
+    null propagation (the parent struct's null values are propagated to children).
+
+    Args:
+    ----
+      struct_array: A struct array which should be queried.
+      field: The request field to retrieve.
+
+    Returns:
+    -------
+      A pa.Array containing the requested field.
+
+    Raises:
+    ------
+      KeyError: If field is not a child field in struct_array.
+    """
+    child_array = struct_array.field(field)
+
+    # In case all values are present then there's no need for special handling.
+    # We can return child_array as is to avoid a performance penalty caused by
+    # constructing and flattening the returned array.
+    if struct_array.null_count == 0:
+        return child_array
+    # is_valid returns a BooleanArray with two buffers the buffer at offset
+    # 0 is always None and buffer 1 contains the data on which fields are
+    # valid/not valid.
+    # (https://arrow.apache.org/docs/format/Columnar.html#buffer-listing-for-each-layout)
+    validity_bitmap_buffer = struct_array.is_valid().buffers()[1]
+
+    # Construct a new struct array with a single field.  Calling flatten() on the
+    # new array guarantees validity bitmaps are merged correctly.
+    new_type = pa.struct([pa.field(field, child_array.type)])
+    filtered_struct = pa.StructArray.from_buffers(
+        new_type,
+        len(struct_array),
+        [validity_bitmap_buffer],
+        null_count=struct_array.null_count,
+        children=[child_array],
+    )
+    return filtered_struct.flatten()[0]
diff --git a/tfx_bsl/arrow/array_util_test.py b/tfx_bsl/arrow/array_util_test.py
index 9c146817..9e7036d1 100644
--- a/tfx_bsl/arrow/array_util_test.py
+++ b/tfx_bsl/arrow/array_util_test.py
@@ -17,13 +17,10 @@
 
 import numpy as np
 import pyarrow as pa
+from absl.testing import absltest, parameterized
 
 from tfx_bsl.arrow import array_util
 
-from absl.testing import absltest
-from absl.testing import parameterized
-
-
 _LIST_TYPE_PARAMETERS = [
     dict(testcase_name="list", list_type_factory=pa.list_),
     dict(testcase_name="large_list", list_type_factory=pa.large_list),
@@ -31,219 +28,245 @@
 
 
 class ArrayUtilTest(parameterized.TestCase):
+    def test_invalid_input_type(self):
+        functions_expecting_list_array = [
+            array_util.GetFlattenedArrayParentIndices,
+        ]
+        functions_expecting_array = [array_util.GetArrayNullBitmapAsByteArray]
+        functions_expecting_binary_array = [array_util.GetBinaryArrayTotalByteSize]
+        for f in itertools.chain(
+            functions_expecting_list_array,
+            functions_expecting_array,
+            functions_expecting_binary_array,
+        ):
+            with self.assertRaises((TypeError, RuntimeError)):
+                f(1)
+
+        for f in functions_expecting_list_array:
+            with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
+                f(pa.array([1, 2, 3]))
+
+        for f in functions_expecting_binary_array:
+            with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
+                f(pa.array([[1, 2, 3]]))
+
+    @parameterized.named_parameters(*_LIST_TYPE_PARAMETERS)
+    def test_list_lengths(self, list_type_factory):
+        list_lengths = array_util.ListLengthsFromListArray(
+            pa.array([], type=list_type_factory(pa.int64()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([], type=pa.int64())))
+        list_lengths = array_util.ListLengthsFromListArray(
+            pa.array([[1.0, 2.0], [], [3.0]], type=list_type_factory(pa.float32()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
+        list_lengths = array_util.ListLengthsFromListArray(
+            pa.array([[1.0, 2.0], None, [3.0]], type=list_type_factory(pa.float64()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
+
+    @parameterized.named_parameters(*_LIST_TYPE_PARAMETERS)
+    def test_element_lengths_list_array(self, list_type_factory):
+        list_lengths = array_util.GetElementLengths(
+            pa.array([], type=list_type_factory(pa.int64()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([], type=pa.int64())))
+        list_lengths = array_util.GetElementLengths(
+            pa.array([[1.0, 2.0], [], [3.0]], list_type_factory(pa.float32()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
+        list_lengths = array_util.GetElementLengths(
+            pa.array([[1.0, 2.0], None, [3.0]], list_type_factory(pa.float64()))
+        )
+        self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
+
+    @parameterized.named_parameters(
+        *[
+            dict(testcase_name="binary", binary_like_type=pa.binary()),
+            dict(testcase_name="string", binary_like_type=pa.string()),
+            dict(testcase_name="large_binary", binary_like_type=pa.large_binary()),
+            dict(testcase_name="large_string", binary_like_type=pa.large_string()),
+        ]
+    )
+    def test_element_lengths_binary_like(self, binary_like_type):
+        list_lengths = array_util.GetElementLengths(
+            pa.array([b"a", b"bb", None, b"", b"ccc"], type=binary_like_type)
+        )
+        self.assertTrue(list_lengths.equals(pa.array([1, 2, 0, 0, 3], type=pa.int64())))
+
+    def test_element_lengths_unsupported_type(self):
+        with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
+            array_util.GetElementLengths(pa.array([1, 2, 3], type=pa.int32()))
+
+    def test_get_array_null_bitmap_as_byte_array(self):
+        array = pa.array([], type=pa.int32())
+        null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
+        self.assertTrue(null_masks.equals(pa.array([], type=pa.uint8())))
+
+        array = pa.array([1, 2, None, 3, None], type=pa.int32())
+        null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
+        self.assertTrue(null_masks.equals(pa.array([0, 0, 1, 0, 1], type=pa.uint8())))
+
+        array = pa.array([1, 2, 3])
+        null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
+        self.assertTrue(null_masks.equals(pa.array([0, 0, 0], type=pa.uint8())))
+
+        array = pa.array([None, None, None], type=pa.int32())
+        null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
+        self.assertTrue(null_masks.equals(pa.array([1, 1, 1], type=pa.uint8())))
+        # Demonstrate that the returned array can be converted to a numpy boolean
+        # array w/o copying
+        np.testing.assert_equal(
+            np.array([True, True, True]), null_masks.to_numpy().view(bool)
+        )
 
-  def test_invalid_input_type(self):
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="list",
+                list_type_factory=pa.list_,
+                parent_indices_type=pa.int32(),
+            ),
+            dict(
+                testcase_name="large_list",
+                list_type_factory=pa.large_list,
+                parent_indices_type=pa.int64(),
+            ),
+        ]
+    )
+    def test_get_flattened_array_parent_indices(
+        self, list_type_factory, parent_indices_type
+    ):
+        indices = array_util.GetFlattenedArrayParentIndices(
+            pa.array([], type=list_type_factory(pa.int32()))
+        )
+        self.assertTrue(indices.equals(pa.array([], type=parent_indices_type)))
 
-    functions_expecting_list_array = [
-        array_util.GetFlattenedArrayParentIndices,
-    ]
-    functions_expecting_array = [array_util.GetArrayNullBitmapAsByteArray]
-    functions_expecting_binary_array = [array_util.GetBinaryArrayTotalByteSize]
-    for f in itertools.chain(functions_expecting_list_array,
-                             functions_expecting_array,
-                             functions_expecting_binary_array):
-      with self.assertRaises((TypeError, RuntimeError)):
-        f(1)
-
-    for f in functions_expecting_list_array:
-      with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
-        f(pa.array([1, 2, 3]))
-
-    for f in functions_expecting_binary_array:
-      with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
-        f(pa.array([[1, 2, 3]]))
-
-  @parameterized.named_parameters(*_LIST_TYPE_PARAMETERS)
-  def test_list_lengths(self, list_type_factory):
-    list_lengths = array_util.ListLengthsFromListArray(
-        pa.array([], type=list_type_factory(pa.int64())))
-    self.assertTrue(list_lengths.equals(pa.array([], type=pa.int64())))
-    list_lengths = array_util.ListLengthsFromListArray(
-        pa.array([[1., 2.], [], [3.]], type=list_type_factory(pa.float32())))
-    self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
-    list_lengths = array_util.ListLengthsFromListArray(
-        pa.array([[1., 2.], None, [3.]], type=list_type_factory(pa.float64())))
-    self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
-
-  @parameterized.named_parameters(*_LIST_TYPE_PARAMETERS)
-  def test_element_lengths_list_array(self, list_type_factory):
-    list_lengths = array_util.GetElementLengths(
-        pa.array([], type=list_type_factory(pa.int64())))
-    self.assertTrue(list_lengths.equals(pa.array([], type=pa.int64())))
-    list_lengths = array_util.GetElementLengths(
-        pa.array([[1., 2.], [], [3.]], list_type_factory(pa.float32())))
-    self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
-    list_lengths = array_util.GetElementLengths(
-        pa.array([[1., 2.], None, [3.]], list_type_factory(pa.float64())))
-    self.assertTrue(list_lengths.equals(pa.array([2, 0, 1], type=pa.int64())))
-
-  @parameterized.named_parameters(*[
-      dict(testcase_name="binary", binary_like_type=pa.binary()),
-      dict(testcase_name="string", binary_like_type=pa.string()),
-      dict(testcase_name="large_binary", binary_like_type=pa.large_binary()),
-      dict(testcase_name="large_string", binary_like_type=pa.large_string()),
-  ])
-  def test_element_lengths_binary_like(self, binary_like_type):
-
-    list_lengths = array_util.GetElementLengths(
-        pa.array([b"a", b"bb", None, b"", b"ccc"], type=binary_like_type))
-    self.assertTrue(list_lengths.equals(pa.array([1, 2, 0, 0, 3],
-                                                 type=pa.int64())))
-
-  def test_element_lengths_unsupported_type(self):
-    with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
-      array_util.GetElementLengths(pa.array([1, 2, 3], type=pa.int32()))
-
-  def test_get_array_null_bitmap_as_byte_array(self):
-    array = pa.array([], type=pa.int32())
-    null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
-    self.assertTrue(null_masks.equals(pa.array([], type=pa.uint8())))
-
-    array = pa.array([1, 2, None, 3, None], type=pa.int32())
-    null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
-    self.assertTrue(
-        null_masks.equals(pa.array([0, 0, 1, 0, 1], type=pa.uint8())))
-
-    array = pa.array([1, 2, 3])
-    null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
-    self.assertTrue(null_masks.equals(pa.array([0, 0, 0], type=pa.uint8())))
-
-    array = pa.array([None, None, None], type=pa.int32())
-    null_masks = array_util.GetArrayNullBitmapAsByteArray(array)
-    self.assertTrue(null_masks.equals(pa.array([1, 1, 1], type=pa.uint8())))
-    # Demonstrate that the returned array can be converted to a numpy boolean
-    # array w/o copying
-    np.testing.assert_equal(
-        np.array([True, True, True]), null_masks.to_numpy().view(bool))
-
-  @parameterized.named_parameters(*[
-      dict(
-          testcase_name="list",
-          list_type_factory=pa.list_,
-          parent_indices_type=pa.int32()),
-      dict(
-          testcase_name="large_list",
-          list_type_factory=pa.large_list,
-          parent_indices_type=pa.int64()),
-  ])
-  def test_get_flattened_array_parent_indices(self, list_type_factory,
-                                              parent_indices_type):
-    indices = array_util.GetFlattenedArrayParentIndices(
-        pa.array([], type=list_type_factory(pa.int32())))
-    self.assertTrue(indices.equals(pa.array([], type=parent_indices_type)))
-
-    indices = array_util.GetFlattenedArrayParentIndices(
-        pa.array([[1.], [2.], [], [3., 4.]],
-                 type=list_type_factory(pa.float32())))
-    self.assertTrue(
-        indices.equals(pa.array([0, 1, 3, 3], type=parent_indices_type)))
-
-    indices = array_util.GetFlattenedArrayParentIndices(
-        pa.array([[1.], [2.], [], [3., 4.]],
-                 type=list_type_factory(pa.float32())).slice(1))
-    self.assertTrue(
-        indices.equals(pa.array([0, 2, 2], type=parent_indices_type)))
-
-    indices = array_util.GetFlattenedArrayParentIndices(
-        pa.array([list(range(1024))],
-                 type=list_type_factory(pa.int64())))
-    self.assertTrue(
-        indices.equals(pa.array([0] * 1024, type=parent_indices_type)))
-
-  @parameterized.named_parameters(*[
-      dict(testcase_name="binary", binary_like_type=pa.binary()),
-      dict(testcase_name="string", binary_like_type=pa.string()),
-      dict(testcase_name="large_binary", binary_like_type=pa.large_binary()),
-      dict(testcase_name="large_string", binary_like_type=pa.large_string()),
-  ])
-  def test_get_binary_array_total_byte_size(self, binary_like_type):
-    array = pa.array([b"abc", None, b"def", b"", b"ghi"], type=binary_like_type)
-    self.assertEqual(9, array_util.GetBinaryArrayTotalByteSize(array))
-    sliced_1_2 = array.slice(1, 2)
-    self.assertEqual(3, array_util.GetBinaryArrayTotalByteSize(sliced_1_2))
-    sliced_2 = array.slice(2)
-    self.assertEqual(6, array_util.GetBinaryArrayTotalByteSize(sliced_2))
-
-    empty_array = pa.array([], type=binary_like_type)
-    self.assertEqual(0, array_util.GetBinaryArrayTotalByteSize(empty_array))
-
-  def test_indexin_integer(self):
-    values = pa.array([99, 42, 3, None])
-    # TODO(b/203116559): Change this back to [3, 3, 99] once arrow >= 5.0
-    # is required by TFDV.
-    value_set = pa.array([3, 4, 99])
-    actual = array_util.IndexIn(values, value_set)
-    actual.validate()
-    self.assertTrue(
-        actual.equals(pa.array([2, None, 0, None], type=pa.int32())))
-
-  @parameterized.parameters(
-      *(list(
-          itertools.product([pa.binary(), pa.large_binary()],
-                            [pa.binary(), pa.large_binary()])) +
-        list(
-            itertools.product([pa.string(), pa.large_string()],
-                              [pa.string(), pa.large_string()]))))
-  def test_indexin_binary_alike(self, values_type, value_set_type):
-    # Case #1: value_set does not contain null.
-    values = pa.array(["aa", "bb", "cc", None], values_type)
-    value_set = pa.array(["cc", "cc", "aa"], value_set_type)
-    actual = array_util.IndexIn(values, value_set)
-    actual.validate()
-    self.assertTrue(
-        actual.equals(pa.array([1, None, 0, None], type=pa.int32())),
-        "actual: {}".format(actual))
-
-    # Case #2: value_set contains nulls.
-    values = pa.array(["aa", "bb", "cc", None], values_type)
-    value_set = pa.array(["cc", None, None, "bb"], value_set_type)
-    actual = array_util.IndexIn(values, value_set)
-    actual.validate()
-    self.assertTrue(
-        actual.equals(pa.array([None, 2, 0, 1], type=pa.int32())),
-        "actual: {}".format(actual))
-
-  def test_is_list_like(self):
-    for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
-      self.assertTrue(array_util.is_list_like(t))
-
-    for t in (pa.binary(), pa.int64(), pa.large_string()):
-      self.assertFalse(array_util.is_list_like(t))
-
-  def test_get_innermost_nested_type_nested_input(self):
-    for inner_type in pa.int64(), pa.float32(), pa.binary():
-      for t in (pa.list_(inner_type), pa.large_list(inner_type)):
+        indices = array_util.GetFlattenedArrayParentIndices(
+            pa.array(
+                [[1.0], [2.0], [], [3.0, 4.0]], type=list_type_factory(pa.float32())
+            )
+        )
         self.assertTrue(
-            array_util.get_innermost_nested_type(t).equals(inner_type)
+            indices.equals(pa.array([0, 1, 3, 3], type=parent_indices_type))
         )
 
-  def test_get_innermost_nested_type_non_nested_input(self):
-    for t in pa.int64(), pa.float32(), pa.binary():
-      self.assertTrue(array_util.get_innermost_nested_type(t).equals(t))
+        indices = array_util.GetFlattenedArrayParentIndices(
+            pa.array(
+                [[1.0], [2.0], [], [3.0, 4.0]], type=list_type_factory(pa.float32())
+            ).slice(1)
+        )
+        self.assertTrue(indices.equals(pa.array([0, 2, 2], type=parent_indices_type)))
 
-  def test_flatten_nested(self):
-    input_array = pa.array([[[1, 2]], None, [None, [3]]])
-    flattened, parent_indices = array_util.flatten_nested(
-        input_array, return_parent_indices=False
+        indices = array_util.GetFlattenedArrayParentIndices(
+            pa.array([list(range(1024))], type=list_type_factory(pa.int64()))
+        )
+        self.assertTrue(indices.equals(pa.array([0] * 1024, type=parent_indices_type)))
+
+    @parameterized.named_parameters(
+        *[
+            dict(testcase_name="binary", binary_like_type=pa.binary()),
+            dict(testcase_name="string", binary_like_type=pa.string()),
+            dict(testcase_name="large_binary", binary_like_type=pa.large_binary()),
+            dict(testcase_name="large_string", binary_like_type=pa.large_string()),
+        ]
     )
-    expected = pa.array([1, 2, 3])
-    expected_parent_indices = [0, 0, 2]
-    self.assertIs(parent_indices, None)
-    self.assertTrue(flattened.equals(expected))
-
-    flattened, parent_indices = array_util.flatten_nested(
-        input_array, return_parent_indices=True
+    def test_get_binary_array_total_byte_size(self, binary_like_type):
+        array = pa.array([b"abc", None, b"def", b"", b"ghi"], type=binary_like_type)
+        self.assertEqual(9, array_util.GetBinaryArrayTotalByteSize(array))
+        sliced_1_2 = array.slice(1, 2)
+        self.assertEqual(3, array_util.GetBinaryArrayTotalByteSize(sliced_1_2))
+        sliced_2 = array.slice(2)
+        self.assertEqual(6, array_util.GetBinaryArrayTotalByteSize(sliced_2))
+
+        empty_array = pa.array([], type=binary_like_type)
+        self.assertEqual(0, array_util.GetBinaryArrayTotalByteSize(empty_array))
+
+    def test_indexin_integer(self):
+        values = pa.array([99, 42, 3, None])
+        # TODO(b/203116559): Change this back to [3, 3, 99] once arrow >= 5.0
+        # is required by TFDV.
+        value_set = pa.array([3, 4, 99])
+        actual = array_util.IndexIn(values, value_set)
+        actual.validate()
+        self.assertTrue(actual.equals(pa.array([2, None, 0, None], type=pa.int32())))
+
+    @parameterized.parameters(
+        *(
+            list(
+                itertools.product(
+                    [pa.binary(), pa.large_binary()], [pa.binary(), pa.large_binary()]
+                )
+            )
+            + list(
+                itertools.product(
+                    [pa.string(), pa.large_string()], [pa.string(), pa.large_string()]
+                )
+            )
+        )
     )
-    self.assertTrue(flattened.equals(expected))
-    np.testing.assert_array_equal(parent_indices, expected_parent_indices)
+    def test_indexin_binary_alike(self, values_type, value_set_type):
+        # Case #1: value_set does not contain null.
+        values = pa.array(["aa", "bb", "cc", None], values_type)
+        value_set = pa.array(["cc", "cc", "aa"], value_set_type)
+        actual = array_util.IndexIn(values, value_set)
+        actual.validate()
+        self.assertTrue(
+            actual.equals(pa.array([1, None, 0, None], type=pa.int32())),
+            f"actual: {actual}",
+        )
 
-  def test_flatten_nested_non_list(self):
-    input_array = pa.array([1, 2])
-    flattened, parent_indices = array_util.flatten_nested(
-        input_array, return_parent_indices=True
-    )
-    self.assertTrue(flattened.equals(pa.array([1, 2])))
-    np.testing.assert_array_equal(parent_indices, [0, 1])
+        # Case #2: value_set contains nulls.
+        values = pa.array(["aa", "bb", "cc", None], values_type)
+        value_set = pa.array(["cc", None, None, "bb"], value_set_type)
+        actual = array_util.IndexIn(values, value_set)
+        actual.validate()
+        self.assertTrue(
+            actual.equals(pa.array([None, 2, 0, 1], type=pa.int32())),
+            f"actual: {actual}",
+        )
+
+    def test_is_list_like(self):
+        for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
+            self.assertTrue(array_util.is_list_like(t))
+
+        for t in (pa.binary(), pa.int64(), pa.large_string()):
+            self.assertFalse(array_util.is_list_like(t))
+
+    def test_get_innermost_nested_type_nested_input(self):
+        for inner_type in pa.int64(), pa.float32(), pa.binary():
+            for t in (pa.list_(inner_type), pa.large_list(inner_type)):
+                self.assertTrue(
+                    array_util.get_innermost_nested_type(t).equals(inner_type)
+                )
+
+    def test_get_innermost_nested_type_non_nested_input(self):
+        for t in pa.int64(), pa.float32(), pa.binary():
+            self.assertTrue(array_util.get_innermost_nested_type(t).equals(t))
+
+    def test_flatten_nested(self):
+        input_array = pa.array([[[1, 2]], None, [None, [3]]])
+        flattened, parent_indices = array_util.flatten_nested(
+            input_array, return_parent_indices=False
+        )
+        expected = pa.array([1, 2, 3])
+        expected_parent_indices = [0, 0, 2]
+        self.assertIs(parent_indices, None)
+        self.assertTrue(flattened.equals(expected))
+
+        flattened, parent_indices = array_util.flatten_nested(
+            input_array, return_parent_indices=True
+        )
+        self.assertTrue(flattened.equals(expected))
+        np.testing.assert_array_equal(parent_indices, expected_parent_indices)
+
+    def test_flatten_nested_non_list(self):
+        input_array = pa.array([1, 2])
+        flattened, parent_indices = array_util.flatten_nested(
+            input_array, return_parent_indices=True
+        )
+        self.assertTrue(flattened.equals(pa.array([1, 2])))
+        np.testing.assert_array_equal(parent_indices, [0, 1])
 
 
 _MAKE_LIST_ARRAY_INVALID_INPUT_TEST_CASES = [
@@ -253,15 +276,15 @@ def test_flatten_nested_non_list(self):
         parent_indices=pa.array([0], type=pa.int32()),
         values=pa.array([1]),
         expected_error=RuntimeError,
-        expected_error_regexp="must be int64"
-        ),
+        expected_error_regexp="must be int64",
+    ),
     dict(
         testcase_name="parent_indices_length_not_equal_to_values_length",
         num_parents=1,
         parent_indices=pa.array([0], type=pa.int64()),
         values=pa.array([1, 2]),
         expected_error=RuntimeError,
-        expected_error_regexp="values array and parent indices array must be of the same length"
+        expected_error_regexp="values array and parent indices array must be of the same length",
     ),
     dict(
         testcase_name="num_parents_too_small",
@@ -269,8 +292,8 @@ def test_flatten_nested_non_list(self):
         parent_indices=pa.array([1], type=pa.int64()),
         values=pa.array([1]),
         expected_error=RuntimeError,
-        expected_error_regexp="Found a parent index 1 while num_parents was 1"
-        )
+        expected_error_regexp="Found a parent index 1 while num_parents was 1",
+    ),
 ]
 
 
@@ -281,8 +304,10 @@ def test_flatten_nested_non_list(self):
         parent_indices=pa.array([], type=pa.int64()),
         values=pa.array([], type=pa.int64()),
         empty_list_as_null=True,
-        expected=pa.array([None, None, None, None, None],
-                          type=pa.large_list(pa.int64()))),
+        expected=pa.array(
+            [None, None, None, None, None], type=pa.large_list(pa.int64())
+        ),
+    ),
     dict(
         testcase_name="leading_nulls",
         num_parents=3,
@@ -297,16 +322,18 @@ def test_flatten_nested_non_list(self):
         parent_indices=pa.array([0, 0, 0, 3, 3], type=pa.int64()),
         values=pa.array(["a", "b", "c", "d", "e"], type=pa.binary()),
         empty_list_as_null=True,
-        expected=pa.array([["a", "b", "c"], None, None, ["d", "e"]],
-                          type=pa.large_list(pa.binary()))),
+        expected=pa.array(
+            [["a", "b", "c"], None, None, ["d", "e"]], type=pa.large_list(pa.binary())
+        ),
+    ),
     dict(
         testcase_name="parents_are_all_empty",
         num_parents=5,
         parent_indices=pa.array([], type=pa.int64()),
         values=pa.array([], type=pa.int64()),
         empty_list_as_null=False,
-        expected=pa.array([[], [], [], [], []],
-                          type=pa.large_list(pa.int64()))),
+        expected=pa.array([[], [], [], [], []], type=pa.large_list(pa.int64())),
+    ),
     dict(
         testcase_name="leading_empties",
         num_parents=3,
@@ -321,32 +348,36 @@ def test_flatten_nested_non_list(self):
         parent_indices=pa.array([0, 0, 0, 3, 3], type=pa.int64()),
         values=pa.array(["a", "b", "c", "d", "e"], type=pa.binary()),
         empty_list_as_null=False,
-        expected=pa.array([["a", "b", "c"], [], [], ["d", "e"]],
-                          type=pa.large_list(pa.binary())),
+        expected=pa.array(
+            [["a", "b", "c"], [], [], ["d", "e"]], type=pa.large_list(pa.binary())
         ),
+    ),
 ]
 
 
 class MakeListArrayFromParentIndicesAndValuesTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_MAKE_LIST_ARRAY_INVALID_INPUT_TEST_CASES)
-  def testInvalidInput(self, num_parents, parent_indices, values,
-                       expected_error, expected_error_regexp):
-    with self.assertRaisesRegex(expected_error, expected_error_regexp):
-      array_util.MakeListArrayFromParentIndicesAndValues(
-          num_parents, parent_indices, values)
-
-  @parameterized.named_parameters(*_MAKE_LIST_ARRAY_TEST_CASES)
-  def testMakeListArray(self, num_parents, parent_indices, values,
-                        empty_list_as_null, expected):
-    actual = array_util.MakeListArrayFromParentIndicesAndValues(
-        num_parents, parent_indices, values, empty_list_as_null)
-    actual.validate()
-    if not empty_list_as_null:
-      self.assertEqual(actual.null_count, 0)
-    self.assertTrue(
-        actual.equals(expected),
-        "actual: {}, expected: {}".format(actual, expected))
+    @parameterized.named_parameters(*_MAKE_LIST_ARRAY_INVALID_INPUT_TEST_CASES)
+    def testInvalidInput(
+        self, num_parents, parent_indices, values, expected_error, expected_error_regexp
+    ):
+        with self.assertRaisesRegex(expected_error, expected_error_regexp):
+            array_util.MakeListArrayFromParentIndicesAndValues(
+                num_parents, parent_indices, values
+            )
+
+    @parameterized.named_parameters(*_MAKE_LIST_ARRAY_TEST_CASES)
+    def testMakeListArray(
+        self, num_parents, parent_indices, values, empty_list_as_null, expected
+    ):
+        actual = array_util.MakeListArrayFromParentIndicesAndValues(
+            num_parents, parent_indices, values, empty_list_as_null
+        )
+        actual.validate()
+        if not empty_list_as_null:
+            self.assertEqual(actual.null_count, 0)
+        self.assertTrue(
+            actual.equals(expected), f"actual: {actual}, expected: {expected}"
+        )
 
 
 _COO_FROM_LIST_ARRAY_TEST_CASES = [
@@ -369,15 +400,15 @@ def testMakeListArray(self, num_parents, parent_indices, values,
         list_array=[[]],
         expected_coo=[],
         expected_dense_shape=[1, 0],
-        array_types=[pa.list_(pa.int64()),
-                     pa.large_list(pa.string())]),
+        array_types=[pa.list_(pa.int64()), pa.large_list(pa.string())],
+    ),
     dict(
         testcase_name="2d_ragged",
         list_array=[["a", "b"], ["c"], [], ["d", "e"]],
         expected_coo=[0, 0, 0, 1, 1, 0, 3, 0, 3, 1],
         expected_dense_shape=[4, 2],
-        array_types=[pa.list_(pa.string()),
-                     pa.large_list(pa.large_binary())]),
+        array_types=[pa.list_(pa.string()), pa.large_list(pa.large_binary())],
+    ),
     dict(
         testcase_name="3d_ragged",
         list_array=[[["a", "b"], ["c"]], [[], ["d", "e"]]],
@@ -394,26 +425,27 @@ def testMakeListArray(self, num_parents, parent_indices, values,
 
 
 class CooFromListArrayTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_COO_FROM_LIST_ARRAY_TEST_CASES)
-  def testCooFromListArray(
-      self, list_array, expected_coo, expected_dense_shape, array_types):
-
-    for array_type in array_types:
-      for input_array in [
-          pa.array(list_array, type=array_type),
-          # it should work for sliced arrays.
-          pa.array(list_array + list_array,
-                   type=array_type).slice(0, len(list_array)),
-          pa.array(list_array + list_array,
-                   type=array_type).slice(len(list_array)),
-      ]:
-        coo, dense_shape = array_util.CooFromListArray(input_array)
-        self.assertTrue(coo.type.equals(pa.int64()))
-        self.assertTrue(dense_shape.type.equals(pa.int64()))
-
-        self.assertEqual(expected_coo, coo.to_pylist())
-        self.assertEqual(expected_dense_shape, dense_shape.to_pylist())
+    @parameterized.named_parameters(*_COO_FROM_LIST_ARRAY_TEST_CASES)
+    def testCooFromListArray(
+        self, list_array, expected_coo, expected_dense_shape, array_types
+    ):
+        for array_type in array_types:
+            for input_array in [
+                pa.array(list_array, type=array_type),
+                # it should work for sliced arrays.
+                pa.array(list_array + list_array, type=array_type).slice(
+                    0, len(list_array)
+                ),
+                pa.array(list_array + list_array, type=array_type).slice(
+                    len(list_array)
+                ),
+            ]:
+                coo, dense_shape = array_util.CooFromListArray(input_array)
+                self.assertTrue(coo.type.equals(pa.int64()))
+                self.assertTrue(dense_shape.type.equals(pa.int64()))
+
+                self.assertEqual(expected_coo, coo.to_pylist())
+                self.assertEqual(expected_dense_shape, dense_shape.to_pylist())
 
 
 _FILL_NULL_LISTS_TEST_CASES = [
@@ -465,114 +497,133 @@ def testCooFromListArray(
         value_type=pa.large_binary(),
         fill_with=["x", "x"],
         expected=[["a"], ["b"], ["c"], ["x", "x"], ["d"], ["x", "x"], ["e"]],
-    )
+    ),
 ]
 
 
 def _cross_named_parameters(*named_parameters_dicts):
-  result = []
-  for product in itertools.product(*named_parameters_dicts):
-    crossed = dict(product[0])
-    testcase_name = crossed["testcase_name"]
-    for d in product[1:]:
-      testcase_name += "_" + d["testcase_name"]
-      crossed.update(d)
-    crossed["testcase_name"] = testcase_name
-    result.append(crossed)
-  return result
+    result = []
+    for product in itertools.product(*named_parameters_dicts):
+        crossed = dict(product[0])
+        testcase_name = crossed["testcase_name"]
+        for d in product[1:]:
+            testcase_name += "_" + d["testcase_name"]
+            crossed.update(d)
+        crossed["testcase_name"] = testcase_name
+        result.append(crossed)
+    return result
 
 
 class FillNullListsTest(parameterized.TestCase):
+    @parameterized.named_parameters(
+        *_cross_named_parameters(_FILL_NULL_LISTS_TEST_CASES, _LIST_TYPE_PARAMETERS)
+    )
+    def testFillNullLists(
+        self, list_array, value_type, fill_with, expected, list_type_factory
+    ):
+        actual = array_util.FillNullLists(
+            pa.array(list_array, type=list_type_factory(value_type)),
+            pa.array(fill_with, type=value_type),
+        )
+        self.assertTrue(
+            actual.equals(pa.array(expected, type=list_type_factory(value_type))),
+            f"{actual} vs {expected}",
+        )
 
-  @parameterized.named_parameters(*_cross_named_parameters(
-      _FILL_NULL_LISTS_TEST_CASES, _LIST_TYPE_PARAMETERS))
-  def testFillNullLists(
-      self, list_array, value_type, fill_with, expected, list_type_factory):
-    actual = array_util.FillNullLists(
-        pa.array(list_array, type=list_type_factory(value_type)),
-        pa.array(fill_with, type=value_type))
-    self.assertTrue(
-        actual.equals(pa.array(expected, type=list_type_factory(value_type))),
-        "{} vs {}".format(actual, expected))
-
-  def testNonListArray(self):
-    with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
-      array_util.FillNullLists(pa.array([1, 2, 3]), pa.array([4]))
+    def testNonListArray(self):
+        with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
+            array_util.FillNullLists(pa.array([1, 2, 3]), pa.array([4]))
 
-  def testValueTypeDoesNotEqualFillType(self):
-    with self.assertRaisesRegex(RuntimeError, "to be of the same type"):
-      array_util.FillNullLists(pa.array([[1]]), pa.array(["a"]))
+    def testValueTypeDoesNotEqualFillType(self):
+        with self.assertRaisesRegex(RuntimeError, "to be of the same type"):
+            array_util.FillNullLists(pa.array([[1]]), pa.array(["a"]))
 
 
 def _all_false_null_bitmap_size(size):
-  if pa.__version__ < "0.17":
-    return size
-  # starting from arrow 0.17, the array factory won't create a null bitmap if
-  # no element is null.
-  # TODO(zhuo): clean up this shim once tfx_bsl supports arrow 0.17+
-  # exclusively.
-  return 0
+    if pa.__version__ < "0.17":
+        return size
+    # starting from arrow 0.17, the array factory won't create a null bitmap if
+    # no element is null.
+    # TODO(zhuo): clean up this shim once tfx_bsl supports arrow 0.17+
+    # exclusively.
+    return 0
 
 
 def _get_numeric_byte_size_test_cases():
-  result = []
-  for array_type, sizeof in [
-      (pa.int8(), 1),
-      (pa.uint8(), 1),
-      (pa.int16(), 2),
-      (pa.uint16(), 2),
-      (pa.int32(), 4),
-      (pa.uint32(), 4),
-      (pa.int64(), 8),
-      (pa.uint64(), 8),
-      (pa.float32(), 4),
-      (pa.float64(), 8),
-  ]:
-    result.append(
-        dict(
-            testcase_name=str(array_type),
-            array=pa.array(range(9), type=array_type),
-            slice_offset=2,
-            slice_length=3,
-            expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
-            expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3)))
-  return result
+    result = []
+    for array_type, sizeof in [
+        (pa.int8(), 1),
+        (pa.uint8(), 1),
+        (pa.int16(), 2),
+        (pa.uint16(), 2),
+        (pa.int32(), 4),
+        (pa.uint32(), 4),
+        (pa.int64(), 8),
+        (pa.uint64(), 8),
+        (pa.float32(), 4),
+        (pa.float64(), 8),
+    ]:
+        result.append(
+            dict(
+                testcase_name=str(array_type),
+                array=pa.array(range(9), type=array_type),
+                slice_offset=2,
+                slice_length=3,
+                expected_size=(_all_false_null_bitmap_size(2) + sizeof * 9),
+                expected_sliced_size=(_all_false_null_bitmap_size(1) + sizeof * 3),
+            )
+        )
+    return result
 
 
 def _get_binary_like_byte_size_test_cases():
-  result = []
-  for array_type, sizeof_offsets in [
-      (pa.binary(), 4),
-      (pa.string(), 4),
-      (pa.large_binary(), 8),
-      (pa.large_string(), 8),
-  ]:
-    result.append(
-        dict(
-            testcase_name=str(array_type),
-            array=pa.array([
-                "a", "bb", "ccc", "dddd", "eeeee", "ffffff", "ggggggg",
-                "hhhhhhhh", "iiiiiiiii"
-            ],
-                           type=array_type),
-            slice_offset=1,
-            slice_length=3,
-            # contents: 45
-            # offsets: 10 * sizeof_offsets
-            # null bitmap: 2
-            expected_size=(45 + sizeof_offsets * 10 +
-                           _all_false_null_bitmap_size(2)),
-            # contents: 9
-            # offsets: 4 * sizeof_offsets
-            # null bitmap: 1
-            expected_sliced_size=(9 + sizeof_offsets * 4 +
-                                  _all_false_null_bitmap_size(1))))
-  return result
+    result = []
+    for array_type, sizeof_offsets in [
+        (pa.binary(), 4),
+        (pa.string(), 4),
+        (pa.large_binary(), 8),
+        (pa.large_string(), 8),
+    ]:
+        result.append(
+            dict(
+                testcase_name=str(array_type),
+                array=pa.array(
+                    [
+                        "a",
+                        "bb",
+                        "ccc",
+                        "dddd",
+                        "eeeee",
+                        "ffffff",
+                        "ggggggg",
+                        "hhhhhhhh",
+                        "iiiiiiiii",
+                    ],
+                    type=array_type,
+                ),
+                slice_offset=1,
+                slice_length=3,
+                # contents: 45
+                # offsets: 10 * sizeof_offsets
+                # null bitmap: 2
+                expected_size=(
+                    45 + sizeof_offsets * 10 + _all_false_null_bitmap_size(2)
+                ),
+                # contents: 9
+                # offsets: 4 * sizeof_offsets
+                # null bitmap: 1
+                expected_sliced_size=(
+                    9 + sizeof_offsets * 4 + _all_false_null_bitmap_size(1)
+                ),
+            )
+        )
+    return result
 
 
 _GET_BYTE_SIZE_TEST_CASES = (
-    _get_numeric_byte_size_test_cases() +
-    _get_binary_like_byte_size_test_cases() + [
+    _get_numeric_byte_size_test_cases()
+    + _get_binary_like_byte_size_test_cases()
+    + [
         dict(
             testcase_name="bool",
             array=pa.array([False] * 9, type=pa.bool_()),
@@ -583,11 +634,13 @@ def _get_binary_like_byte_size_test_cases():
             expected_size=(_all_false_null_bitmap_size(2) + 2),
             # contents: 1
             # null bitmap: 1
-            expected_sliced_size=(_all_false_null_bitmap_size(1) + 1)),
+            expected_sliced_size=(_all_false_null_bitmap_size(1) + 1),
+        ),
         dict(
             testcase_name="list",
-            array=pa.array([[1], [1, 1], [1, 1, 1], [1, 1, 1, 1]],
-                           type=pa.list_(pa.int64())),
+            array=pa.array(
+                [[1], [1, 1], [1, 1, 1], [1, 1, 1, 1]], type=pa.list_(pa.int64())
+            ),
             slice_offset=1,
             slice_length=2,
             # offsets: 5 * 4
@@ -601,12 +654,13 @@ def _get_binary_like_byte_size_test_cases():
             # contents:
             #   null bitmap: 1
             #   contents: 5 * 8
-            expected_sliced_size=(3 * 4 + _all_false_null_bitmap_size(1 + 1)
-                                  + 5 * 8)),
+            expected_sliced_size=(3 * 4 + _all_false_null_bitmap_size(1 + 1) + 5 * 8),
+        ),
         dict(
             testcase_name="large_list",
-            array=pa.array([[1], [1, 1], [1, 1, 1], [1, 1, 1, 1]],
-                           type=pa.large_list(pa.int64())),
+            array=pa.array(
+                [[1], [1, 1], [1, 1, 1], [1, 1, 1, 1]], type=pa.large_list(pa.int64())
+            ),
             slice_offset=1,
             slice_length=2,
             # offsets: 5 * 8
@@ -620,72 +674,80 @@ def _get_binary_like_byte_size_test_cases():
             # contents:
             #   null bitmap: 1
             #   contents: 5 * 8
-            expected_sliced_size=(
-                3 * 8 + _all_false_null_bitmap_size(1 + 1) + 5 * 8)),
+            expected_sliced_size=(3 * 8 + _all_false_null_bitmap_size(1 + 1) + 5 * 8),
+        ),
         dict(
             testcase_name="deeply_nested_list",
-            array=pa.array([[["aaa"], ["bb", ""], None],
-                            None,
-                            [["c"], [], ["def", "g"]],
-                            [["h"]]],
-                           type=pa.list_(pa.list_(pa.binary()))),
+            array=pa.array(
+                [[["aaa"], ["bb", ""], None], None, [["c"], [], ["def", "g"]], [["h"]]],
+                type=pa.list_(pa.list_(pa.binary())),
+            ),
             slice_offset=1,
             slice_length=2,
             # innermost binary array: 1 + 11 + 8 * 4
             # mid list array: 1 + 8 * 4
             # outmost list array: 1 + 5 * 4
-            expected_size=(97 +
-                           # innermost binary array does not have null
-                           _all_false_null_bitmap_size(1)),
+            expected_size=(
+                97
+                +
+                # innermost binary array does not have null
+                _all_false_null_bitmap_size(1)
+            ),
             # innermost binary array (["c", "def", "g"]): 1 + 5 + 4 * 4
             # mid list array: ([["c"], [], ["def, "g]]): 1 + 4 * 4
             # outmost list array: 1 + 3 * 4
             expected_sliced_size=(
-                51 +
+                51
+                +
                 # innermost binary array does not have null
-                _all_false_null_bitmap_size(1))),
+                _all_false_null_bitmap_size(1)
+            ),
+        ),
         dict(
             testcase_name="null",
             array=pa.array([None] * 1000),
             slice_offset=4,
             slice_length=100,
             expected_size=0,
-            expected_sliced_size=0),
+            expected_sliced_size=0,
+        ),
         dict(
             testcase_name="struct",
             array=pa.array(
-                [{
-                    "a": 1,
-                    "b": 2
-                }] * 10,
-                type=pa.struct(
-                    [pa.field("a", pa.int64()),
-                     pa.field("b", pa.int64())])),
+                [{"a": 1, "b": 2}] * 10,
+                type=pa.struct([pa.field("a", pa.int64()), pa.field("b", pa.int64())]),
+            ),
             slice_offset=2,
             slice_length=1,
-            expected_size=(_all_false_null_bitmap_size(2) +
-                           (_all_false_null_bitmap_size(2) + 10 * 8) * 2),
-            expected_sliced_size=(_all_false_null_bitmap_size(1) +
-                                  (_all_false_null_bitmap_size(1) + 8) * 2))
-    ])
+            expected_size=(
+                _all_false_null_bitmap_size(2)
+                + (_all_false_null_bitmap_size(2) + 10 * 8) * 2
+            ),
+            expected_sliced_size=(
+                _all_false_null_bitmap_size(1)
+                + (_all_false_null_bitmap_size(1) + 8) * 2
+            ),
+        ),
+    ]
+)
 
 
 class GetByteSizeTest(parameterized.TestCase):
+    @parameterized.named_parameters(*_GET_BYTE_SIZE_TEST_CASES)
+    def testGetByteSize(
+        self, array, slice_offset, slice_length, expected_size, expected_sliced_size
+    ):
+        # make sure the empty array case does not crash.
+        array_util.GetByteSize(pa.array([], array.type))
 
-  @parameterized.named_parameters(*_GET_BYTE_SIZE_TEST_CASES)
-  def testGetByteSize(self, array, slice_offset, slice_length, expected_size,
-                      expected_sliced_size):
-    # make sure the empty array case does not crash.
-    array_util.GetByteSize(pa.array([], array.type))
-
-    self.assertEqual(array_util.GetByteSize(array), expected_size)
+        self.assertEqual(array_util.GetByteSize(array), expected_size)
 
-    sliced = array.slice(slice_offset, slice_length)
-    self.assertEqual(array_util.GetByteSize(sliced), expected_sliced_size)
+        sliced = array.slice(slice_offset, slice_length)
+        self.assertEqual(array_util.GetByteSize(sliced), expected_sliced_size)
 
-  def testUnsupported(self):
-    with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
-      array_util.GetByteSize(pa.array([], type=pa.timestamp("s")))
+    def testUnsupported(self):
+        with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED"):
+            array_util.GetByteSize(pa.array([], type=pa.timestamp("s")))
 
 
 _TO_SINGLETON_LIST_ARRAY_TEST_CASES = [
@@ -697,33 +759,33 @@ def testUnsupported(self):
     dict(
         testcase_name="no_null",
         array=pa.array([1, 2, 3]),
-        expected_result=pa.array([[1], [2], [3]],
-                                 type=pa.large_list(pa.int64())),
+        expected_result=pa.array([[1], [2], [3]], type=pa.large_list(pa.int64())),
     ),
     dict(
         testcase_name="all_nulls",
         array=pa.array([None, None, None], type=pa.binary()),
-        expected_result=pa.array([None, None, None],
-                                 type=pa.large_list(pa.binary())),
+        expected_result=pa.array([None, None, None], type=pa.large_list(pa.binary())),
     ),
     dict(
         testcase_name="some_nulls",
         array=pa.array([None, None, 2, 3, None, 4, None, None]),
-        expected_result=pa.array([None, None, [2], [3], None, [4], None, None],
-                                 type=pa.large_list(pa.int64())),
+        expected_result=pa.array(
+            [None, None, [2], [3], None, [4], None, None],
+            type=pa.large_list(pa.int64()),
+        ),
     ),
 ]
 
 
 class ToSingletonListArrayTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_TO_SINGLETON_LIST_ARRAY_TEST_CASES)
-  def testToSingletonListArray(self, array, expected_result):
-    result = array_util.ToSingletonListArray(array)
-    result.validate()
-    self.assertTrue(
-        result.equals(expected_result),
-        "expected: {}; got: {}".format(expected_result, result))
+    @parameterized.named_parameters(*_TO_SINGLETON_LIST_ARRAY_TEST_CASES)
+    def testToSingletonListArray(self, array, expected_result):
+        result = array_util.ToSingletonListArray(array)
+        result.validate()
+        self.assertTrue(
+            result.equals(expected_result),
+            f"expected: {expected_result}; got: {result}",
+        )
 
 
 _COUNT_INVALID_UTF8_TEST_CASES = [
@@ -744,8 +806,7 @@ def testToSingletonListArray(self, array, expected_result):
     ),
     dict(
         testcase_name="some_valid_binary_array",
-        array=pa.array([b"a", b"b", b"\xfc\xa1\xa1\xa1\xa1\xa1"],
-                       type="binary"),
+        array=pa.array([b"a", b"b", b"\xfc\xa1\xa1\xa1\xa1\xa1"], type="binary"),
         expected_count=1,
     ),
     dict(
@@ -757,16 +818,15 @@ def testToSingletonListArray(self, array, expected_result):
 
 
 class CountInvalidUtf8(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_COUNT_INVALID_UTF8_TEST_CASES)
-  def test_count_utf8(self, array, expected_count=None, expected_error=None):
-    if expected_error:
-      with self.assertRaisesRegex(RuntimeError, expected_error):
-        array_util.CountInvalidUTF8(array)
-    else:
-      count = array_util.CountInvalidUTF8(array)
-      self.assertEqual(expected_count, count)
+    @parameterized.named_parameters(*_COUNT_INVALID_UTF8_TEST_CASES)
+    def test_count_utf8(self, array, expected_count=None, expected_error=None):
+        if expected_error:
+            with self.assertRaisesRegex(RuntimeError, expected_error):
+                array_util.CountInvalidUTF8(array)
+        else:
+            count = array_util.CountInvalidUTF8(array)
+            self.assertEqual(expected_count, count)
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/arrow/path.py b/tfx_bsl/arrow/path.py
index ef1052fd..2d6f5e05 100644
--- a/tfx_bsl/arrow/path.py
+++ b/tfx_bsl/arrow/path.py
@@ -18,120 +18,130 @@
 from tensorflow_metadata.proto.v0 import path_pb2
 
 
-class ColumnPath(object):
-  """ColumnPath addresses a column potentially nested under a StructArray."""
+class ColumnPath:
+    """ColumnPath addresses a column potentially nested under a StructArray."""
+
+    __slot__ = ["_steps"]
+
+    def __init__(self, steps: Union[Iterable[str], str]):
+        """If a single Step is specified, constructs a Path of that step."""
+        if isinstance(steps, str):
+            steps = (steps,)
+        self._steps = tuple(steps)
+
+    def to_proto(self) -> path_pb2.Path:
+        """Creates a tensorflow_metadata path proto this ColumnPath."""
+        return path_pb2.Path(step=self._steps)
 
-  __slot__ = ["_steps"]
+    @staticmethod
+    def from_proto(path_proto: path_pb2.Path):
+        """Creates a ColumnPath from a tensorflow_metadata path proto.
 
-  def __init__(self, steps: Union[Iterable[Text], Text]):
-    """If a single Step is specified, constructs a Path of that step."""
-    if isinstance(steps, Text):
-      steps = (steps,)
-    self._steps = tuple(steps)
+        Args:
+        ----
+          path_proto: a tensorflow_metadata path proto.
 
-  def to_proto(self) -> path_pb2.Path:
-    """Creates a tensorflow_metadata path proto this ColumnPath."""
-    return path_pb2.Path(step=self._steps)
+        Returns:
+        -------
+          A ColumnPath representing the path proto's steps.
+        """
+        return ColumnPath(path_proto.step)
 
-  @staticmethod
-  def from_proto(path_proto: path_pb2.Path):
-    """Creates a ColumnPath from a tensorflow_metadata path proto.
+    def steps(self) -> Tuple[str, ...]:
+        """Returns the tuple of steps that represents this ColumnPath."""
+        return self._steps
 
-    Args:
-      path_proto: a tensorflow_metadata path proto.
+    def parent(self) -> "ColumnPath":
+        """Gets the parent path of the current ColumnPath.
 
-    Returns:
-      A ColumnPath representing the path proto's steps.
-    """
-    return ColumnPath(path_proto.step)
+        example: ColumnPath(["this", "is", "my", "path"]).parent() will
+        return a ColumnPath representing "this.is.my".
 
-  def steps(self) -> Tuple[Text, ...]:
-    """Returns the tuple of steps that represents this ColumnPath."""
-    return self._steps
+        Returns
+        -------
+          A ColumnPath with the leaf step removed.
+        """
+        if not self._steps:
+            raise ValueError("Root does not have parent.")
+        return ColumnPath(self._steps[:-1])
 
-  def parent(self) -> "ColumnPath":
-    """Gets the parent path of the current ColumnPath.
+    def child(self, child_step: str) -> "ColumnPath":
+        """Creates a new ColumnPath with a new child.
 
-    example: ColumnPath(["this", "is", "my", "path"]).parent() will
-    return a ColumnPath representing "this.is.my".
+        example: ColumnPath(["this", "is", "my", "path"]).child("new_step") will
+        return a ColumnPath representing "this.is.my.path.new_step".
 
-    Returns:
-      A ColumnPath with the leaf step removed.
-    """
-    if not self._steps:
-      raise ValueError("Root does not have parent.")
-    return ColumnPath(self._steps[:-1])
+        Args:
+        ----
+          child_step: name of the new child step to append.
 
-  def child(self, child_step: Text) -> "ColumnPath":
-    """Creates a new ColumnPath with a new child.
+        Returns:
+        -------
+          A ColumnPath with the new child_step
+        """
+        return ColumnPath(self._steps + (child_step,))
 
-    example: ColumnPath(["this", "is", "my", "path"]).child("new_step") will
-    return a ColumnPath representing "this.is.my.path.new_step".
+    def prefix(self, ending_index: int) -> "ColumnPath":
+        """Creates a new ColumnPath, taking the prefix until the ending_index.
 
-    Args:
-      child_step: name of the new child step to append.
+        example: ColumnPath(["this", "is", "my", "path"]).prefix(1) will return a
+        ColumnPath representing "this.is.my".
 
-    Returns:
-      A ColumnPath with the new child_step
-    """
-    return ColumnPath(self._steps + (child_step,))
+        Args:
+        ----
+          ending_index: where to end the prefix.
 
-  def prefix(self, ending_index: int) -> "ColumnPath":
-    """Creates a new ColumnPath, taking the prefix until the ending_index.
+        Returns:
+        -------
+          A ColumnPath representing the prefix of this ColumnPath.
+        """
+        return ColumnPath(self._steps[:ending_index])
 
-    example: ColumnPath(["this", "is", "my", "path"]).prefix(1) will return a
-    ColumnPath representing "this.is.my".
+    def suffix(self, starting_index: int) -> "ColumnPath":
+        """Creates a new ColumnPath, taking the suffix from the starting_index.
 
-    Args:
-      ending_index: where to end the prefix.
+        example: ColumnPath(["this", "is", "my", "path"]).suffix(1) will return a
+        ColumnPath representing "is.my.path".
 
-    Returns:
-      A ColumnPath representing the prefix of this ColumnPath.
-    """
-    return ColumnPath(self._steps[:ending_index])
+        Args:
+        ----
+          starting_index: where to start the suffix.
 
-  def suffix(self, starting_index: int) -> "ColumnPath":
-    """Creates a new ColumnPath, taking the suffix from the starting_index.
+        Returns:
+        -------
+          A ColumnPath representing the suffix of this ColumnPath.
+        """
+        return ColumnPath(self._steps[starting_index:])
 
-    example: ColumnPath(["this", "is", "my", "path"]).suffix(1) will return a
-    ColumnPath representing "is.my.path".
+    def initial_step(self) -> str:
+        """Returns the first step of this path.
 
-    Args:
-      starting_index: where to start the suffix.
+        Raises
+        ------
+          ValueError: if the path is empty.
+        """
+        if not self._steps:
+            raise ValueError("This ColumnPath does not have any steps.")
+        return self._steps[0]
+
+    def __str__(self) -> str:
+        return ".".join(self._steps)
 
-    Returns:
-      A ColumnPath representing the suffix of this ColumnPath.
-    """
-    return ColumnPath(self._steps[starting_index:])
+    def __repr__(self) -> str:
+        return self.__str__()
+
+    def __eq__(self, other) -> bool:
+        return self._steps == other._steps  # pylint: disable=protected-access
 
-  def initial_step(self) -> Text:
-    """Returns the first step of this path.
+    def __lt__(self, other) -> bool:
+        # lexicographic order.
+        return self._steps < other._steps  # pylint: disable=protected-access
 
-    Raises:
-      ValueError: if the path is empty.
-    """
-    if not self._steps:
-      raise ValueError("This ColumnPath does not have any steps.")
-    return self._steps[0]
+    def __hash__(self) -> int:
+        return hash(self._steps)
 
-  def __str__(self) -> Text:
-    return u".".join(self._steps)
+    def __len__(self) -> int:
+        return len(self._steps)
 
-  def __repr__(self) -> Text:
-    return self.__str__()
-
-  def __eq__(self, other) -> bool:
-    return self._steps == other._steps  # pylint: disable=protected-access
-
-  def __lt__(self, other) -> bool:
-    # lexicographic order.
-    return self._steps < other._steps  # pylint: disable=protected-access
-
-  def __hash__(self) -> int:
-    return hash(self._steps)
-
-  def __len__(self) -> int:
-    return len(self._steps)
-
-  def __bool__(self) -> bool:
-    return bool(self._steps)
+    def __bool__(self) -> bool:
+        return bool(self._steps)
diff --git a/tfx_bsl/arrow/table_util.py b/tfx_bsl/arrow/table_util.py
index a11987ab..d803625a 100644
--- a/tfx_bsl/arrow/table_util.py
+++ b/tfx_bsl/arrow/table_util.py
@@ -18,21 +18,26 @@
 
 import numpy as np
 import pyarrow as pa
-from tfx_bsl.arrow import array_util
-from tfx_bsl.arrow import path
+
+from tfx_bsl.arrow import array_util, path
 
 # pytype: disable=import-error
 # pylint: disable=unused-import
 # pylint: disable=g-import-not-at-top
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import RecordBatchTake
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import MergeRecordBatches as _MergeRecordBatches
-  from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import TotalByteSize as _TotalByteSize
+    from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import (
+        MergeRecordBatches as _MergeRecordBatches,
+    )
+    from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import RecordBatchTake
+    from tfx_bsl.cc.tfx_bsl_extension.arrow.table_util import (
+        TotalByteSize as _TotalByteSize,
+    )
 except ImportError as err:
-  sys.stderr.write("Error importing tfx_bsl_extension.arrow.table_util. "
-                   "Some tfx_bsl functionalities are not available: {}"
-                   .format(err))
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.arrow.table_util. "
+        f"Some tfx_bsl functionalities are not available: {err}"
+    )
 # pylint: enable=g-import-not-at-top
 # pytype: enable=import-error
 # pylint: enable=unused-import
@@ -51,244 +56,264 @@
 }
 
 
-def TotalByteSize(table_or_batch: Union[pa.Table, pa.RecordBatch],
-                  ignore_unsupported=False):
-  """Returns the in-memory size of a record batch or a table."""
-  if isinstance(table_or_batch, pa.Table):
-    return sum([
-        _TotalByteSize(b, ignore_unsupported)
-        for b in table_or_batch.to_batches(max_chunksize=None)
-    ])
-  else:
-    return _TotalByteSize(table_or_batch, ignore_unsupported)
+def TotalByteSize(
+    table_or_batch: Union[pa.Table, pa.RecordBatch], ignore_unsupported=False
+):
+    """Returns the in-memory size of a record batch or a table."""
+    if isinstance(table_or_batch, pa.Table):
+        return sum(
+            [
+                _TotalByteSize(b, ignore_unsupported)
+                for b in table_or_batch.to_batches(max_chunksize=None)
+            ]
+        )
+    else:
+        return _TotalByteSize(table_or_batch, ignore_unsupported)
 
 
 def NumpyKindToArrowType(kind: str) -> Optional[pa.DataType]:
-  return _NUMPY_KIND_TO_ARROW_TYPE.get(kind)
+    return _NUMPY_KIND_TO_ARROW_TYPE.get(kind)
 
 
 def MergeRecordBatches(record_batches: List[pa.RecordBatch]) -> pa.RecordBatch:
-  """Merges a list of arrow RecordBatches into one. Similar to MergeTables."""
-  if not record_batches:
-    return _EMPTY_RECORD_BATCH
-  first_schema = record_batches[0].schema
-  assert any([r.num_rows > 0 for r in record_batches]), (
-      "Unable to merge empty RecordBatches.")
-  if (all([r.schema.equals(first_schema) for r in record_batches[1:]])
-      # combine_chunks() cannot correctly handle the case where there are
-      # 0 column. (ARROW-11232)
-      and first_schema):
-    one_chunk_table = pa.Table.from_batches(record_batches).combine_chunks()
-    batches = one_chunk_table.to_batches(max_chunksize=None)
-    assert len(batches) == 1
-    return batches[0]
-  else:
-    # Our implementation of _MergeRecordBatches is different than
-    # pa.Table.concat_tables(
-    #     [pa.Table.from_batches([rb]) for rb in record_batches],
-    #     promote=True).combine_chunks().to_batches()[0]
-    # in its handling of struct-typed columns -- if two record batches have a
-    # column of the same name but of different struct types, _MergeRecordBatches
-    # will try merging (recursively) those struct types while concat_tables
-    # will not. We should consider upstreaming our implementation because it's a
-    # generalization
-    return _MergeRecordBatches(record_batches)
+    """Merges a list of arrow RecordBatches into one. Similar to MergeTables."""
+    if not record_batches:
+        return _EMPTY_RECORD_BATCH
+    first_schema = record_batches[0].schema
+    assert any(
+        [r.num_rows > 0 for r in record_batches]
+    ), "Unable to merge empty RecordBatches."
+    if (
+        all([r.schema.equals(first_schema) for r in record_batches[1:]])
+        # combine_chunks() cannot correctly handle the case where there are
+        # 0 column. (ARROW-11232)
+        and first_schema
+    ):
+        one_chunk_table = pa.Table.from_batches(record_batches).combine_chunks()
+        batches = one_chunk_table.to_batches(max_chunksize=None)
+        assert len(batches) == 1
+        return batches[0]
+    else:
+        # Our implementation of _MergeRecordBatches is different than
+        # pa.Table.concat_tables(
+        #     [pa.Table.from_batches([rb]) for rb in record_batches],
+        #     promote=True).combine_chunks().to_batches()[0]
+        # in its handling of struct-typed columns -- if two record batches have a
+        # column of the same name but of different struct types, _MergeRecordBatches
+        # will try merging (recursively) those struct types while concat_tables
+        # will not. We should consider upstreaming our implementation because it's a
+        # generalization
+        return _MergeRecordBatches(record_batches)
 
 
 def _CanonicalizeType(arrow_type: pa.DataType) -> pa.DataType:
-  """Returns canonical version of the given type."""
-  if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
-    return pa.large_list(_CanonicalizeType(arrow_type.value_type))
-  else:
-    result = NumpyKindToArrowType(np.dtype(arrow_type.to_pandas_dtype()).kind)
-    if result is None:
-      raise NotImplementedError(f"Type {arrow_type} is not supported.")
-    return result
+    """Returns canonical version of the given type."""
+    if pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type):
+        return pa.large_list(_CanonicalizeType(arrow_type.value_type))
+    else:
+        result = NumpyKindToArrowType(np.dtype(arrow_type.to_pandas_dtype()).kind)
+        if result is None:
+            raise NotImplementedError(f"Type {arrow_type} is not supported.")
+        return result
 
 
 def CanonicalizeRecordBatch(
-    record_batch_with_primitive_arrays: pa.RecordBatch) -> pa.RecordBatch:
-  """Converts primitive arrays in a pyarrow.RecordBatch to LargeListArrays.
-
-  The produced LargeListArrays' elements are lists that contain single element
-  of the array of the canonical pyarrow type.
-
-  Args:
-    record_batch_with_primitive_arrays: A pyarrow.RecordBatch where values are
-      stored in primitive arrays or list arrays.
-
-  Returns:
-    pyArrow.RecordBatch with LargeListArray columns.
-  """
-  arrays = []
-  for column_array in record_batch_with_primitive_arrays.columns:
-    canonical_type = _CanonicalizeType(column_array.type)
-    if canonical_type != column_array.type:
-      column_array = column_array.cast(canonical_type)
-    if pa.types.is_large_list(canonical_type):
-      arrays.append(column_array)
-    else:
-      arrays.append(array_util.ToSingletonListArray(column_array))
-  return pa.RecordBatch.from_arrays(
-      arrays, record_batch_with_primitive_arrays.schema.names)
+    record_batch_with_primitive_arrays: pa.RecordBatch,
+) -> pa.RecordBatch:
+    """Converts primitive arrays in a pyarrow.RecordBatch to LargeListArrays.
+
+    The produced LargeListArrays' elements are lists that contain single element
+    of the array of the canonical pyarrow type.
+
+    Args:
+    ----
+      record_batch_with_primitive_arrays: A pyarrow.RecordBatch where values are
+        stored in primitive arrays or list arrays.
+
+    Returns:
+    -------
+      pyArrow.RecordBatch with LargeListArray columns.
+    """
+    arrays = []
+    for column_array in record_batch_with_primitive_arrays.columns:
+        canonical_type = _CanonicalizeType(column_array.type)
+        if canonical_type != column_array.type:
+            column_array = column_array.cast(canonical_type)
+        if pa.types.is_large_list(canonical_type):
+            arrays.append(column_array)
+        else:
+            arrays.append(array_util.ToSingletonListArray(column_array))
+    return pa.RecordBatch.from_arrays(
+        arrays, record_batch_with_primitive_arrays.schema.names
+    )
 
 
 def enumerate_arrays(  # pylint: disable=invalid-name
     record_batch: pa.RecordBatch,
     enumerate_leaves_only: bool,
-    wrap_flat_struct_in_list: bool = True
+    wrap_flat_struct_in_list: bool = True,
 ) -> Iterable[Tuple[path.ColumnPath, pa.Array]]:
-  """Enumerates arrays in a RecordBatch.
-
-  Define:
-    primitive: primitive arrow arrays (e.g. Int64Array).
-    nested_list := list<nested_list> | list<primitive> | null
-    # note: a null array can be seen as a list<primitive>, which contains only
-    #   nulls and the type of the primitive is unknown.
-    # example:
-    #   null,
-    #   list<null>,  # like list<list<unknown_type>> with only null values.
-    #   list<list<int64>>,
-    struct := struct<{field: nested_list | struct}> | list<struct>
-    # example:
-    #   struct<{"foo": list<int64>},
-    #   list<struct<{"foo": list<int64>}>>,
-    #   struct<{"foo": struct<{"bar": list<list<int64>>}>}>
-
-  This function assumes `record_batch` contains only nested_list and struct
-  columns. It enumerates each column in `record_batch`, and if that column is
-  a struct, it flattens the outer lists wrapping it (if any), and recursively
-  enumerates the array of each field in the struct (also see
-  `enumerate_leaves_only`).
-
-  A ColumnPath is included in the result to address the enumerated array.
-  Note that the ColumnPath merely addresses in the `record_batch` and struct
-  arrays. It does not indicate whether / how a struct array is nested.
-
-  Args:
-    record_batch: The RecordBatch whose arrays to be visited.
-    enumerate_leaves_only: If True, only enumerate leaf arrays. A leaf array
-      is an array whose type does not have any struct nested in.
-      Otherwise, also enumerate the struct arrays where the leaf arrays are
-      contained.
-    wrap_flat_struct_in_list: if True, and if a struct<[Ts]> array is
-      encountered, it will be wrapped in a list array, so it becomes a
-      list<struct<[Ts]>>, in which each sub-list contains one element.
-      A caller can make use of this option to assume all the arrays enumerated
-      here are list<inner_type>.
-  Yields:
-    A tuple. The first term is the path of the feature, and the second term is
-    the feature array.
-  """
-
-  def _recursion_helper(   # pylint: disable=invalid-name
-      feature_path: path.ColumnPath, array: pa.Array,
-  ) -> Iterable[Tuple[path.ColumnPath, pa.Array]]:
-    """Recursion helper."""
-    array_type = array.type
-    innermost_nested_type = array_util.get_innermost_nested_type(array_type)
-    if pa.types.is_struct(innermost_nested_type):
-      if not enumerate_leaves_only:
-        # special handing for a flat struct array -- wrap it in a ListArray
-        # whose elements are singleton lists. This way downstream can keep
-        # assuming the enumerated arrays are list<*>.
-        to_yield = array
-        if pa.types.is_struct(array_type) and wrap_flat_struct_in_list:
-          to_yield = array_util.ToSingletonListArray(array)
-        yield (feature_path, to_yield)
-      flat_struct_array, _ = array_util.flatten_nested(array)
-      for field in flat_struct_array.type:
-        field_name = field.name
-        yield from _recursion_helper(
-            feature_path.child(field_name),
-            array_util.get_field(flat_struct_array, field_name))
-    else:
-      yield (feature_path, array)
-
-  for column_name, column in zip(record_batch.schema.names,
-                                 record_batch.columns):
-    yield from _recursion_helper(
-        path.ColumnPath([column_name]), column)
-
-
-def get_array(   # pylint: disable=invalid-name
+    """Enumerates arrays in a RecordBatch.
+
+    Define:
+      primitive: primitive arrow arrays (e.g. Int64Array).
+      nested_list := list<nested_list> | list<primitive> | null
+      # note: a null array can be seen as a list<primitive>, which contains only
+      #   nulls and the type of the primitive is unknown.
+      # example:
+      #   null,
+      #   list<null>,  # like list<list<unknown_type>> with only null values.
+      #   list<list<int64>>,
+      struct := struct<{field: nested_list | struct}> | list<struct>
+      # example:
+      #   struct<{"foo": list<int64>},
+      #   list<struct<{"foo": list<int64>}>>,
+      #   struct<{"foo": struct<{"bar": list<list<int64>>}>}>
+
+    This function assumes `record_batch` contains only nested_list and struct
+    columns. It enumerates each column in `record_batch`, and if that column is
+    a struct, it flattens the outer lists wrapping it (if any), and recursively
+    enumerates the array of each field in the struct (also see
+    `enumerate_leaves_only`).
+
+    A ColumnPath is included in the result to address the enumerated array.
+    Note that the ColumnPath merely addresses in the `record_batch` and struct
+    arrays. It does not indicate whether / how a struct array is nested.
+
+    Args:
+    ----
+      record_batch: The RecordBatch whose arrays to be visited.
+      enumerate_leaves_only: If True, only enumerate leaf arrays. A leaf array
+        is an array whose type does not have any struct nested in.
+        Otherwise, also enumerate the struct arrays where the leaf arrays are
+        contained.
+      wrap_flat_struct_in_list: if True, and if a struct<[Ts]> array is
+        encountered, it will be wrapped in a list array, so it becomes a
+        list<struct<[Ts]>>, in which each sub-list contains one element.
+        A caller can make use of this option to assume all the arrays enumerated
+        here are list<inner_type>.
+
+    Yields:
+    ------
+      A tuple. The first term is the path of the feature, and the second term is
+      the feature array.
+    """
+
+    def _recursion_helper(  # pylint: disable=invalid-name
+        feature_path: path.ColumnPath,
+        array: pa.Array,
+    ) -> Iterable[Tuple[path.ColumnPath, pa.Array]]:
+        """Recursion helper."""
+        array_type = array.type
+        innermost_nested_type = array_util.get_innermost_nested_type(array_type)
+        if pa.types.is_struct(innermost_nested_type):
+            if not enumerate_leaves_only:
+                # special handing for a flat struct array -- wrap it in a ListArray
+                # whose elements are singleton lists. This way downstream can keep
+                # assuming the enumerated arrays are list<*>.
+                to_yield = array
+                if pa.types.is_struct(array_type) and wrap_flat_struct_in_list:
+                    to_yield = array_util.ToSingletonListArray(array)
+                yield (feature_path, to_yield)
+            flat_struct_array, _ = array_util.flatten_nested(array)
+            for field in flat_struct_array.type:
+                field_name = field.name
+                yield from _recursion_helper(
+                    feature_path.child(field_name),
+                    array_util.get_field(flat_struct_array, field_name),
+                )
+        else:
+            yield (feature_path, array)
+
+    for column_name, column in zip(record_batch.schema.names, record_batch.columns):
+        yield from _recursion_helper(path.ColumnPath([column_name]), column)
+
+
+def get_array(  # pylint: disable=invalid-name
     record_batch: pa.RecordBatch,
     query_path: path.ColumnPath,
     return_example_indices: bool,
     wrap_flat_struct_in_list: bool = True,
 ) -> Tuple[pa.Array, Optional[np.ndarray]]:
-  """Retrieve a nested array (and optionally example indices) from RecordBatch.
-
-  This function has the same assumption over `record_batch` as
-  `enumerate_arrays()` does.
-
-  If the provided path refers to a leaf in the `record_batch`, then a
-  "nested_list" will be returned. If the provided path does not refer to a leaf,
-  a "struct" will be returned.
-
-  See `enumerate_arrays()` for definition of "nested_list" and "struct".
-
-  Args:
-    record_batch: The RecordBatch whose arrays to be visited.
-    query_path: The ColumnPath to lookup in the record_batch.
-    return_example_indices: Whether to return an additional array containing the
-      example indices of the elements in the array corresponding to the
-      query_path.
-    wrap_flat_struct_in_list: if True, and if the query_path leads to a
-      struct<[Ts]> array, it will be wrapped in a list array, where each
-      sub-list contains one element. Caller can make use of this option to
-      assume this function always returns a list<inner_type>.
-
-  Returns:
-    A tuple. The first term is the feature array and the second term is the
-    example_indices array for the feature array (i.e. array[i] came from the
-    example at row example_indices[i] in the record_batch.).
-
-  Raises:
-    KeyError: When the query_path is empty, or cannot be found in the
-    record_batch and its nested struct arrays.
-  """
-
-  def _recursion_helper(   # pylint: disable=invalid-name
-      query_path: path.ColumnPath, array: pa.Array,
-      example_indices: Optional[np.ndarray]
-  ) -> Tuple[pa.Array, Optional[np.ndarray]]:
-    """Recursion helper."""
-    array_type = array.type
+    """Retrieve a nested array (and optionally example indices) from RecordBatch.
+
+    This function has the same assumption over `record_batch` as
+    `enumerate_arrays()` does.
+
+    If the provided path refers to a leaf in the `record_batch`, then a
+    "nested_list" will be returned. If the provided path does not refer to a leaf,
+    a "struct" will be returned.
+
+    See `enumerate_arrays()` for definition of "nested_list" and "struct".
+
+    Args:
+    ----
+      record_batch: The RecordBatch whose arrays to be visited.
+      query_path: The ColumnPath to lookup in the record_batch.
+      return_example_indices: Whether to return an additional array containing the
+        example indices of the elements in the array corresponding to the
+        query_path.
+      wrap_flat_struct_in_list: if True, and if the query_path leads to a
+        struct<[Ts]> array, it will be wrapped in a list array, where each
+        sub-list contains one element. Caller can make use of this option to
+        assume this function always returns a list<inner_type>.
+
+    Returns:
+    -------
+      A tuple. The first term is the feature array and the second term is the
+      example_indices array for the feature array (i.e. array[i] came from the
+      example at row example_indices[i] in the record_batch.).
+
+    Raises:
+    ------
+      KeyError: When the query_path is empty, or cannot be found in the
+      record_batch and its nested struct arrays.
+    """
+
+    def _recursion_helper(  # pylint: disable=invalid-name
+        query_path: path.ColumnPath,
+        array: pa.Array,
+        example_indices: Optional[np.ndarray],
+    ) -> Tuple[pa.Array, Optional[np.ndarray]]:
+        """Recursion helper."""
+        array_type = array.type
+        if not query_path:
+            if pa.types.is_struct(array_type) and wrap_flat_struct_in_list:
+                array = array_util.ToSingletonListArray(array)
+            return array, example_indices
+        if not pa.types.is_struct(array_util.get_innermost_nested_type(array_type)):
+            raise KeyError(
+                f"Cannot process query_path ({query_path}) inside an array of type "
+                f"{array_type}. Expecting a struct<...> or "
+                "(large_)list...<struct<...>>."
+            )
+        flat_struct_array, parent_indices = array_util.flatten_nested(
+            array, example_indices is not None
+        )
+        flat_indices = None
+        if example_indices is not None:
+            flat_indices = example_indices[parent_indices]
+
+        step = query_path.steps()[0]
+
+        try:
+            child_array = array_util.get_field(flat_struct_array, step)
+        except KeyError as exception:
+            raise KeyError(f"query_path step ({step}) not in struct.") from exception
+
+        relative_path = path.ColumnPath(query_path.steps()[1:])
+        return _recursion_helper(relative_path, child_array, flat_indices)
+
     if not query_path:
-      if pa.types.is_struct(array_type) and wrap_flat_struct_in_list:
-        array = array_util.ToSingletonListArray(array)
-      return array, example_indices
-    if not pa.types.is_struct(array_util.get_innermost_nested_type(array_type)):
-      raise KeyError("Cannot process query_path ({}) inside an array of type "
-                     "{}. Expecting a struct<...> or "
-                     "(large_)list...<struct<...>>.".format(
-                         query_path, array_type))
-    flat_struct_array, parent_indices = array_util.flatten_nested(
-        array, example_indices is not None)
-    flat_indices = None
-    if example_indices is not None:
-      flat_indices = example_indices[parent_indices]
-
-    step = query_path.steps()[0]
-
-    try:
-      child_array = array_util.get_field(flat_struct_array, step)
-    except KeyError as exception:
-      raise KeyError(f"query_path step ({step}) not in struct.") from exception
-
-    relative_path = path.ColumnPath(query_path.steps()[1:])
-    return _recursion_helper(relative_path, child_array, flat_indices)
-
-  if not query_path:
-    raise KeyError("query_path must be non-empty.")
-  column_name = query_path.steps()[0]
-  field_index = record_batch.schema.get_field_index(column_name)
-  if field_index < 0:
-    raise KeyError(f"query_path step 0 ({column_name}) not in record batch.")
-  array = record_batch.column(field_index)
-  array_path = path.ColumnPath(query_path.steps()[1:])
-
-  example_indices = np.arange(
-      record_batch.num_rows) if return_example_indices else None
-  return _recursion_helper(array_path, array, example_indices)
+        raise KeyError("query_path must be non-empty.")
+    column_name = query_path.steps()[0]
+    field_index = record_batch.schema.get_field_index(column_name)
+    if field_index < 0:
+        raise KeyError(f"query_path step 0 ({column_name}) not in record batch.")
+    array = record_batch.column(field_index)
+    array_path = path.ColumnPath(query_path.steps()[1:])
+
+    example_indices = (
+        np.arange(record_batch.num_rows) if return_example_indices else None
+    )
+    return _recursion_helper(array_path, array, example_indices)
diff --git a/tfx_bsl/arrow/table_util_test.py b/tfx_bsl/arrow/table_util_test.py
index 24aaf154..53ea9e80 100644
--- a/tfx_bsl/arrow/table_util_test.py
+++ b/tfx_bsl/arrow/table_util_test.py
@@ -15,19 +15,14 @@
 
 import collections
 import itertools
-
 from typing import Dict, Iterable, NamedTuple
 
 import numpy as np
 import pyarrow as pa
 import six
-from tfx_bsl.arrow import array_util
-from tfx_bsl.arrow import path
-from tfx_bsl.arrow import table_util
-
-from absl.testing import absltest
-from absl.testing import parameterized
+from absl.testing import absltest, parameterized
 
+from tfx_bsl.arrow import array_util, path, table_util
 
 _MERGE_TEST_CASES = [
     dict(
@@ -44,14 +39,12 @@
                 "uint64": pa.array([1, None, 3], type=pa.uint64()),
                 "int32": pa.array([1, None, 3], type=pa.int32()),
                 "uint32": pa.array([1, None, 3], type=pa.uint32()),
-                "float": pa.array([1., None, 3.], type=pa.float32()),
-                "double": pa.array([1., None, 3.], type=pa.float64()),
+                "float": pa.array([1.0, None, 3.0], type=pa.float32()),
+                "double": pa.array([1.0, None, 3.0], type=pa.float64()),
                 "bytes": pa.array([b"abc", None, b"ghi"], type=pa.binary()),
-                "large_bytes": pa.array([b"abc", None, b"ghi"],
-                                        type=pa.large_binary()),
-                "unicode": pa.array([u"abc", None, u"ghi"], type=pa.utf8()),
-                "large_unicode": pa.array([u"abc", None, u"ghi"],
-                                          type=pa.large_utf8()),
+                "large_bytes": pa.array([b"abc", None, b"ghi"], type=pa.large_binary()),
+                "unicode": pa.array(["abc", None, "ghi"], type=pa.utf8()),
+                "large_unicode": pa.array(["abc", None, "ghi"], type=pa.large_utf8()),
             },
             {
                 "bool": pa.array([None, False], type=pa.bool_()),
@@ -59,48 +52,39 @@
                 "uint64": pa.array([None, 4], type=pa.uint64()),
                 "int32": pa.array([None, 4], type=pa.int32()),
                 "uint32": pa.array([None, 4], type=pa.uint32()),
-                "float": pa.array([None, 4.], type=pa.float32()),
-                "double": pa.array([None, 4.], type=pa.float64()),
+                "float": pa.array([None, 4.0], type=pa.float32()),
+                "double": pa.array([None, 4.0], type=pa.float64()),
                 "bytes": pa.array([None, b"jkl"], type=pa.binary()),
                 "large_bytes": pa.array([None, b"jkl"], type=pa.large_binary()),
-                "unicode": pa.array([None, u"jkl"], type=pa.utf8()),
-                "large_unicode": pa.array([None, u"jkl"], type=pa.large_utf8()),
+                "unicode": pa.array([None, "jkl"], type=pa.utf8()),
+                "large_unicode": pa.array([None, "jkl"], type=pa.large_utf8()),
             },
         ],
         expected_output={
-            "bool":
-                pa.array([False, None, True, None, False], type=pa.bool_()),
-            "int64":
-                pa.array([1, None, 3, None, 4], type=pa.int64()),
-            "uint64":
-                pa.array([1, None, 3, None, 4], type=pa.uint64()),
-            "int32":
-                pa.array([1, None, 3, None, 4], type=pa.int32()),
-            "uint32":
-                pa.array([1, None, 3, None, 4], type=pa.uint32()),
-            "float":
-                pa.array([1., None, 3., None, 4.], type=pa.float32()),
-            "double":
-                pa.array([1., None, 3., None, 4.], type=pa.float64()),
-            "bytes":
-                pa.array([b"abc", None, b"ghi", None, b"jkl"],
-                         type=pa.binary()),
-            "large_bytes":
-                pa.array([b"abc", None, b"ghi", None, b"jkl"],
-                         type=pa.large_binary()),
-            "unicode":
-                pa.array([u"abc", None, u"ghi", None, u"jkl"],
-                         type=pa.utf8()),
-            "large_unicode":
-                pa.array([u"abc", None, u"ghi", None, u"jkl"],
-                         type=pa.large_utf8()),
-        }),
+            "bool": pa.array([False, None, True, None, False], type=pa.bool_()),
+            "int64": pa.array([1, None, 3, None, 4], type=pa.int64()),
+            "uint64": pa.array([1, None, 3, None, 4], type=pa.uint64()),
+            "int32": pa.array([1, None, 3, None, 4], type=pa.int32()),
+            "uint32": pa.array([1, None, 3, None, 4], type=pa.uint32()),
+            "float": pa.array([1.0, None, 3.0, None, 4.0], type=pa.float32()),
+            "double": pa.array([1.0, None, 3.0, None, 4.0], type=pa.float64()),
+            "bytes": pa.array([b"abc", None, b"ghi", None, b"jkl"], type=pa.binary()),
+            "large_bytes": pa.array(
+                [b"abc", None, b"ghi", None, b"jkl"], type=pa.large_binary()
+            ),
+            "unicode": pa.array(["abc", None, "ghi", None, "jkl"], type=pa.utf8()),
+            "large_unicode": pa.array(
+                ["abc", None, "ghi", None, "jkl"], type=pa.large_utf8()
+            ),
+        },
+    ),
     dict(
         testcase_name="list",
         inputs=[
             {
-                "list<int32>":
-                    pa.array([[1, None, 3], None], type=pa.list_(pa.int32())),
+                "list<int32>": pa.array(
+                    [[1, None, 3], None], type=pa.list_(pa.int32())
+                ),
             },
             {
                 "list<int32>": pa.array([None], type=pa.list_(pa.int32())),
@@ -113,300 +97,325 @@
             },
         ],
         expected_output={
-            "list<int32>":
-                pa.array([[1, None, 3], None, None, []],
-                         type=pa.list_(pa.int32()))
-        }),
+            "list<int32>": pa.array(
+                [[1, None, 3], None, None, []], type=pa.list_(pa.int32())
+            )
+        },
+    ),
     dict(
         testcase_name="large_list",
         inputs=[
             {
-                "large_list<int32>":
-                    pa.array([[1, None, 3], None],
-                             type=pa.large_list(pa.int32())),
+                "large_list<int32>": pa.array(
+                    [[1, None, 3], None], type=pa.large_list(pa.int32())
+                ),
             },
             {
-                "large_list<int32>":
-                    pa.array([None], type=pa.large_list(pa.int32())),
+                "large_list<int32>": pa.array([None], type=pa.large_list(pa.int32())),
             },
             {
-                "large_list<int32>":
-                    pa.array([], type=pa.large_list(pa.int32())),
+                "large_list<int32>": pa.array([], type=pa.large_list(pa.int32())),
             },
             {
-                "large_list<int32>":
-                    pa.array([[]], type=pa.large_list(pa.int32())),
+                "large_list<int32>": pa.array([[]], type=pa.large_list(pa.int32())),
             },
         ],
         expected_output={
-            "large_list<int32>":
-                pa.array([[1, None, 3], None, None, []],
-                         type=pa.large_list(pa.int32()))
-        }),
+            "large_list<int32>": pa.array(
+                [[1, None, 3], None, None, []], type=pa.large_list(pa.int32())
+            )
+        },
+    ),
     dict(
         testcase_name="struct",
-        inputs=[{
-            "struct<binary, list<int32>>":
-                pa.StructArray.from_arrays([
-                    pa.array([b"abc", None, b"def"]),
-                    pa.array([[None], [1, 2], []], type=pa.list_(pa.int32()))
-                ], ["f1", "f2"])
-        }, {
-            "struct<binary, list<int32>>":
-                pa.StructArray.from_arrays([
-                    pa.array([b"ghi"]),
-                    pa.array([[3]], type=pa.list_(pa.int32()))
-                ], ["f1", "f2"])
-        }],
-        expected_output={
-            "struct<binary, list<int32>>":
-                pa.StructArray.from_arrays([
-                    pa.array([b"abc", None, b"def", b"ghi"]),
-                    pa.array([[None], [1, 2], [], [3]],
-                             type=pa.list_(pa.int32()))
-                ], ["f1", "f2"])
-        }),
-    dict(
-        testcase_name="missing_or_null_column_fixed_width",
         inputs=[
             {
-                "int32": pa.array([None, None], type=pa.null())
+                "struct<binary, list<int32>>": pa.StructArray.from_arrays(
+                    [
+                        pa.array([b"abc", None, b"def"]),
+                        pa.array([[None], [1, 2], []], type=pa.list_(pa.int32())),
+                    ],
+                    ["f1", "f2"],
+                )
             },
             {
-                "int64": pa.array([None, None], type=pa.null())
-            },
-            {
-                "int64": pa.array([123], type=pa.int64())
-            },
-            {
-                "int32": pa.array([456], type=pa.int32())
+                "struct<binary, list<int32>>": pa.StructArray.from_arrays(
+                    [pa.array([b"ghi"]), pa.array([[3]], type=pa.list_(pa.int32()))],
+                    ["f1", "f2"],
+                )
             },
         ],
         expected_output={
-            "int32":
-                pa.array([None, None, None, None, None, 456], type=pa.int32()),
-            "int64":
-                pa.array([None, None, None, None, 123, None], type=pa.int64()),
-        }),
+            "struct<binary, list<int32>>": pa.StructArray.from_arrays(
+                [
+                    pa.array([b"abc", None, b"def", b"ghi"]),
+                    pa.array([[None], [1, 2], [], [3]], type=pa.list_(pa.int32())),
+                ],
+                ["f1", "f2"],
+            )
+        },
+    ),
+    dict(
+        testcase_name="missing_or_null_column_fixed_width",
+        inputs=[
+            {"int32": pa.array([None, None], type=pa.null())},
+            {"int64": pa.array([None, None], type=pa.null())},
+            {"int64": pa.array([123], type=pa.int64())},
+            {"int32": pa.array([456], type=pa.int32())},
+        ],
+        expected_output={
+            "int32": pa.array([None, None, None, None, None, 456], type=pa.int32()),
+            "int64": pa.array([None, None, None, None, 123, None], type=pa.int64()),
+        },
+    ),
     dict(
         testcase_name="missing_or_null_column_list_alike",
         inputs=[
-            {
-                "list<int32>": pa.array([None, None], type=pa.null())
-            },
-            {
-                "utf8": pa.array([None, None], type=pa.null())
-            },
-            {
-                "utf8": pa.array([u"abc"], type=pa.utf8())
-            },
-            {
-                "list<int32>":
-                    pa.array([None, [123, 456]], type=pa.list_(pa.int32()))
-            },
+            {"list<int32>": pa.array([None, None], type=pa.null())},
+            {"utf8": pa.array([None, None], type=pa.null())},
+            {"utf8": pa.array(["abc"], type=pa.utf8())},
+            {"list<int32>": pa.array([None, [123, 456]], type=pa.list_(pa.int32()))},
         ],
         expected_output={
-            "list<int32>":
-                pa.array([None, None, None, None, None, None, [123, 456]],
-                         type=pa.list_(pa.int32())),
-            "utf8":
-                pa.array([None, None, None, None, u"abc", None, None],
-                         type=pa.utf8()),
-        }),
+            "list<int32>": pa.array(
+                [None, None, None, None, None, None, [123, 456]],
+                type=pa.list_(pa.int32()),
+            ),
+            "utf8": pa.array(
+                [None, None, None, None, "abc", None, None], type=pa.utf8()
+            ),
+        },
+    ),
     dict(
         testcase_name="missing_or_null_column_struct",
-        inputs=[{
-            "struct<int32, list<int32>>": pa.array([None, None], type=pa.null())
-        }, {
-            "list<utf8>": pa.array([None, None], type=pa.null())
-        }, {
-            "struct<int32, list<int32>>":
-                pa.StructArray.from_arrays([
-                    pa.array([1, 2, None], type=pa.int32()),
-                    pa.array([[1], None, [3, 4]], type=pa.list_(pa.int32()))
-                ], ["f1", "f2"])
-        }, {
-            "list<utf8>": pa.array([u"abc", None], type=pa.utf8())
-        }],
+        inputs=[
+            {"struct<int32, list<int32>>": pa.array([None, None], type=pa.null())},
+            {"list<utf8>": pa.array([None, None], type=pa.null())},
+            {
+                "struct<int32, list<int32>>": pa.StructArray.from_arrays(
+                    [
+                        pa.array([1, 2, None], type=pa.int32()),
+                        pa.array([[1], None, [3, 4]], type=pa.list_(pa.int32())),
+                    ],
+                    ["f1", "f2"],
+                )
+            },
+            {"list<utf8>": pa.array(["abc", None], type=pa.utf8())},
+        ],
         expected_output={
-            "list<utf8>":
-                pa.array(
-                    [None, None, None, None, None, None, None, u"abc", None],
-                    type=pa.utf8()),
-            "struct<int32, list<int32>>":
-                pa.array([
-                    None, None, None, None, (1, [1]), (2, None),
-                    (None, [3, 4]), None, None
+            "list<utf8>": pa.array(
+                [None, None, None, None, None, None, None, "abc", None], type=pa.utf8()
+            ),
+            "struct<int32, list<int32>>": pa.array(
+                [
+                    None,
+                    None,
+                    None,
+                    None,
+                    (1, [1]),
+                    (2, None),
+                    (None, [3, 4]),
+                    None,
+                    None,
                 ],
-                         type=pa.struct([
-                             pa.field("f1", pa.int32()),
-                             pa.field("f2", pa.list_(pa.int32()))
-                         ])),
-        }),
+                type=pa.struct(
+                    [pa.field("f1", pa.int32()), pa.field("f2", pa.list_(pa.int32()))]
+                ),
+            ),
+        },
+    ),
     dict(
         testcase_name="merge_list_of_null_and_list_of_list",
-        inputs=[{
-            "f": pa.array([[None, None], None], type=pa.list_(pa.null()))
-        }, {
-            "f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))
-        }],
+        inputs=[
+            {"f": pa.array([[None, None], None], type=pa.list_(pa.null()))},
+            {"f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))},
+        ],
         expected_output={
-            "f":
-                pa.array([[None, None], None, [[123]], None],
-                         type=pa.list_(pa.list_(pa.int32())))
-        }),
+            "f": pa.array(
+                [[None, None], None, [[123]], None], type=pa.list_(pa.list_(pa.int32()))
+            )
+        },
+    ),
     dict(
         testcase_name="merge_large_list_of_null_and_list_of_list",
-        inputs=[{
-            "f": pa.array([[None, None], None], type=pa.large_list(pa.null()))
-        }, {
-            "f": pa.array([[[123]], None],
-                          type=pa.large_list(pa.large_list(pa.int32())))
-        }],
+        inputs=[
+            {"f": pa.array([[None, None], None], type=pa.large_list(pa.null()))},
+            {
+                "f": pa.array(
+                    [[[123]], None], type=pa.large_list(pa.large_list(pa.int32()))
+                )
+            },
+        ],
         expected_output={
-            "f":
-                pa.array([[None, None], None, [[123]], None],
-                         type=pa.large_list(pa.large_list(pa.int32())))
-        }),
+            "f": pa.array(
+                [[None, None], None, [[123]], None],
+                type=pa.large_list(pa.large_list(pa.int32())),
+            )
+        },
+    ),
     dict(
         testcase_name="merge_sliced_list_of_null_and_list_of_list",
-        inputs=[{
-            "f": pa.array(
-                [None, [None, None], None], type=pa.list_(pa.null())).slice(1)
-        }, {
-            "f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))
-        }],
+        inputs=[
+            {
+                "f": pa.array(
+                    [None, [None, None], None], type=pa.list_(pa.null())
+                ).slice(1)
+            },
+            {"f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))},
+        ],
         expected_output={
-            "f":
-                pa.array([[None, None], None, [[123]], None],
-                         type=pa.list_(pa.list_(pa.int32())))
-        }),
+            "f": pa.array(
+                [[None, None], None, [[123]], None], type=pa.list_(pa.list_(pa.int32()))
+            )
+        },
+    ),
     dict(
         testcase_name="merge_list_of_list_and_list_of_null",
-        inputs=[{
-            "f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))
-        }, {
-            "f": pa.array([[None, None], None], type=pa.list_(pa.null()))
-        }],
+        inputs=[
+            {"f": pa.array([[[123]], None], type=pa.list_(pa.list_(pa.int32())))},
+            {"f": pa.array([[None, None], None], type=pa.list_(pa.null()))},
+        ],
         expected_output={
-            "f":
-                pa.array([[[123]], None, [None, None], None],
-                         type=pa.list_(pa.list_(pa.int32())))
-        }),
+            "f": pa.array(
+                [[[123]], None, [None, None], None], type=pa.list_(pa.list_(pa.int32()))
+            )
+        },
+    ),
     dict(
         testcase_name="merge_list_of_null_and_null",
-        inputs=[{
-            "f": pa.array([None], type=pa.null())
-        }, {
-            "f": pa.array([[None, None], None], type=pa.list_(pa.null()))
-        }],
+        inputs=[
+            {"f": pa.array([None], type=pa.null())},
+            {"f": pa.array([[None, None], None], type=pa.list_(pa.null()))},
+        ],
         expected_output={
             "f": pa.array([None, [None, None], None], type=pa.list_(pa.null()))
-        }),
+        },
+    ),
     dict(
         testcase_name="merge_compatible_struct_missing_field",
-        inputs=[{
-            "f": pa.array([{"a": [1]}, {"a": [2, 3]}]),
-        }, {
-            "f": pa.array([{"b": [1.0]}]),
-        }],
+        inputs=[
+            {
+                "f": pa.array([{"a": [1]}, {"a": [2, 3]}]),
+            },
+            {
+                "f": pa.array([{"b": [1.0]}]),
+            },
+        ],
         expected_output={
-            "f": pa.array([
-                {"a": [1], "b": None},
-                {"a": [2, 3], "b": None},
-                {"a": None, "b": [1.0]}])
-        }),
+            "f": pa.array(
+                [
+                    {"a": [1], "b": None},
+                    {"a": [2, 3], "b": None},
+                    {"a": None, "b": [1.0]},
+                ]
+            )
+        },
+    ),
     dict(
         testcase_name="merge_compatible_struct_null_type",
-        inputs=[{
-            "f":
-                pa.array([{"a": [[1]]}],
-                         type=pa.struct([
-                             pa.field("a",
-                                      pa.large_list(pa.large_list(pa.int32())))
-                         ])),
-        }, {
-            "f":
-                pa.array([{"a": None}, {"a": None}],
-                         type=pa.struct([pa.field("a", pa.null())])),
-        }],
+        inputs=[
+            {
+                "f": pa.array(
+                    [{"a": [[1]]}],
+                    type=pa.struct(
+                        [pa.field("a", pa.large_list(pa.large_list(pa.int32())))]
+                    ),
+                ),
+            },
+            {
+                "f": pa.array(
+                    [{"a": None}, {"a": None}],
+                    type=pa.struct([pa.field("a", pa.null())]),
+                ),
+            },
+        ],
         expected_output={
-            "f":
-                pa.array([{"a": [[1]]},
-                          {"a": None},
-                          {"a": None}],
-                         type=pa.struct([
-                             pa.field("a",
-                                      pa.large_list(pa.large_list(pa.int32())))
-                         ]))
-        }),
+            "f": pa.array(
+                [{"a": [[1]]}, {"a": None}, {"a": None}],
+                type=pa.struct(
+                    [pa.field("a", pa.large_list(pa.large_list(pa.int32())))]
+                ),
+            )
+        },
+    ),
     dict(
         testcase_name="merge_compatible_struct_in_struct",
-        inputs=[{
-            "f": pa.array([{}, {}]),
-        }, {
-            "f": pa.array([
-                {"a": [{"b": 1}]},
-                {"a": [{"b": 2}]},
-            ])
-        }, {
-            "f": pa.array([
-                {"a": [{"b": 3, "c": 1}]},
-            ])
-        }],
+        inputs=[
+            {
+                "f": pa.array([{}, {}]),
+            },
+            {
+                "f": pa.array(
+                    [
+                        {"a": [{"b": 1}]},
+                        {"a": [{"b": 2}]},
+                    ]
+                )
+            },
+            {
+                "f": pa.array(
+                    [
+                        {"a": [{"b": 3, "c": 1}]},
+                    ]
+                )
+            },
+        ],
         expected_output={
-            "f": pa.array([
-                {"a": None},
-                {"a": None},
-                {"a": [{"b": 1, "c": None}]},
-                {"a": [{"b": 2, "c": None}]},
-                {"a": [{"b": 3, "c": 1}]}])
-        })
+            "f": pa.array(
+                [
+                    {"a": None},
+                    {"a": None},
+                    {"a": [{"b": 1, "c": None}]},
+                    {"a": [{"b": 2, "c": None}]},
+                    {"a": [{"b": 3, "c": 1}]},
+                ]
+            )
+        },
+    ),
 ]
 
 _MERGE_INVALID_INPUT_TEST_CASES = [
     dict(
         testcase_name="column_type_differs",
         inputs=[
-            pa.RecordBatch.from_arrays([pa.array([1, 2, 3], type=pa.int32())],
-                                       ["f1"]),
-            pa.RecordBatch.from_arrays([pa.array([4, 5, 6], type=pa.int64())],
-                                       ["f1"])
+            pa.RecordBatch.from_arrays([pa.array([1, 2, 3], type=pa.int32())], ["f1"]),
+            pa.RecordBatch.from_arrays([pa.array([4, 5, 6], type=pa.int64())], ["f1"]),
         ],
-        expected_error_regexp="Unable to merge incompatible type"),
+        expected_error_regexp="Unable to merge incompatible type",
+    ),
 ]
 
 
 class MergeRecordBatchesTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_MERGE_INVALID_INPUT_TEST_CASES)
-  def test_invalid_inputs(self, inputs, expected_error_regexp):
-    with self.assertRaisesRegex(Exception, expected_error_regexp):
-      _ = table_util.MergeRecordBatches(inputs)
-
-  @parameterized.named_parameters(*_MERGE_TEST_CASES)
-  def test_merge_record_batches(self, inputs, expected_output):
-    input_record_batches = [
-        pa.RecordBatch.from_arrays(list(in_dict.values()), list(in_dict.keys()))
-        for in_dict in inputs
-    ]
-    merged = table_util.MergeRecordBatches(input_record_batches)
-
-    self.assertLen(expected_output, merged.num_columns)
-    for column, column_name in zip(merged.columns, merged.schema.names):
-      self.assertTrue(
-          expected_output[column_name].equals(column),
-          "Column {}:\nexpected:{}\ngot: {}".format(
-              column_name, expected_output[column_name], column))
-
-  def test_merge_0_column_record_batches(self):
-    record_batches = ([
-        pa.table([pa.array([1, 2, 3])],
-                 ["ignore"]).remove_column(0).to_batches(max_chunksize=None)[0]
-    ] * 3)
-    merged = table_util.MergeRecordBatches(record_batches)
-    self.assertEqual(merged.num_rows, 9)
-    self.assertEqual(merged.num_columns, 0)
+    @parameterized.named_parameters(*_MERGE_INVALID_INPUT_TEST_CASES)
+    def test_invalid_inputs(self, inputs, expected_error_regexp):
+        with self.assertRaisesRegex(Exception, expected_error_regexp):
+            _ = table_util.MergeRecordBatches(inputs)
+
+    @parameterized.named_parameters(*_MERGE_TEST_CASES)
+    def test_merge_record_batches(self, inputs, expected_output):
+        input_record_batches = [
+            pa.RecordBatch.from_arrays(list(in_dict.values()), list(in_dict.keys()))
+            for in_dict in inputs
+        ]
+        merged = table_util.MergeRecordBatches(input_record_batches)
+
+        self.assertLen(expected_output, merged.num_columns)
+        for column, column_name in zip(merged.columns, merged.schema.names):
+            self.assertTrue(
+                expected_output[column_name].equals(column),
+                f"Column {column_name}:\nexpected:{expected_output[column_name]}\ngot: {column}",
+            )
+
+    def test_merge_0_column_record_batches(self):
+        record_batches = [
+            pa.table([pa.array([1, 2, 3])], ["ignore"])
+            .remove_column(0)
+            .to_batches(max_chunksize=None)[0]
+        ] * 3
+        merged = table_util.MergeRecordBatches(record_batches)
+        self.assertEqual(merged.num_rows, 9)
+        self.assertEqual(merged.num_columns, 0)
 
 
 _GET_TOTAL_BYTE_SIZE_TEST_NAMED_PARAMS = [
@@ -416,589 +425,601 @@ def test_merge_0_column_record_batches(self):
 
 
 class GetTotalByteSizeTest(parameterized.TestCase):
+    @parameterized.named_parameters(*_GET_TOTAL_BYTE_SIZE_TEST_NAMED_PARAMS)
+    def test_simple(self, factory):
+        # 3 int64 values
+        # 5 int32 offsets
+        # 1 null bitmap byte for outer ListArray
+        # 1 null bitmap byte for inner Int64Array
+        # 46 bytes in total.
+        list_array = pa.array([[1, 2], [None], None, None], type=pa.list_(pa.int64()))
+
+        # 1 null bitmap byte for outer StructArray.
+        # 1 null bitmap byte for inner Int64Array.
+        # 4 int64 values.
+        # 34 bytes in total
+        struct_array = pa.array(
+            [{"a": 1}, {"a": 2}, {"a": None}, None],
+            type=pa.struct([pa.field("a", pa.int64())]),
+        )
+        entity = factory([list_array, struct_array], ["a1", "a2"])
 
-  @parameterized.named_parameters(*_GET_TOTAL_BYTE_SIZE_TEST_NAMED_PARAMS)
-  def test_simple(self, factory):
-    # 3 int64 values
-    # 5 int32 offsets
-    # 1 null bitmap byte for outer ListArray
-    # 1 null bitmap byte for inner Int64Array
-    # 46 bytes in total.
-    list_array = pa.array([[1, 2], [None], None, None],
-                          type=pa.list_(pa.int64()))
-
-    # 1 null bitmap byte for outer StructArray.
-    # 1 null bitmap byte for inner Int64Array.
-    # 4 int64 values.
-    # 34 bytes in total
-    struct_array = pa.array([{"a": 1}, {"a": 2}, {"a": None}, None],
-                            type=pa.struct([pa.field("a", pa.int64())]))
-    entity = factory([list_array, struct_array], ["a1", "a2"])
-
-    self.assertEqual(46 + 34, table_util.TotalByteSize(entity))
+        self.assertEqual(46 + 34, table_util.TotalByteSize(entity))
 
 
 _TAKE_TEST_CASES = [
     dict(
         testcase_name="no_index",
         row_indices=[],
-        expected_output=pa.RecordBatch.from_arrays([
-            pa.array([], type=pa.list_(pa.int32())),
-            pa.array([], type=pa.list_(pa.binary()))
-        ], ["f1", "f2"])),
+        expected_output=pa.RecordBatch.from_arrays(
+            [
+                pa.array([], type=pa.list_(pa.int32())),
+                pa.array([], type=pa.list_(pa.binary())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
         testcase_name="one_index",
         row_indices=[1],
-        expected_output=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.list_(pa.int32())),
-            pa.array([["b", "c"]], type=pa.list_(pa.binary()))
-        ], ["f1", "f2"])),
+        expected_output=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.list_(pa.int32())),
+                pa.array([["b", "c"]], type=pa.list_(pa.binary())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
         testcase_name="consecutive_first_row_included",
         row_indices=[0, 1, 2, 3],
         expected_output=pa.RecordBatch.from_arrays(
             [
                 pa.array([[1, 2, 3], None, [4], []], type=pa.list_(pa.int32())),
-                pa.array([["a"], ["b", "c"], None, []],
-                         type=pa.list_(pa.binary()))
+                pa.array([["a"], ["b", "c"], None, []], type=pa.list_(pa.binary())),
             ],
             ["f1", "f2"],
-        )),
+        ),
+    ),
     dict(
         testcase_name="consecutive_last_row_included",
         row_indices=[5, 6, 7, 8],
         expected_output=pa.RecordBatch.from_arrays(
             [
                 pa.array([[7], [8, 9], [10], []], type=pa.list_(pa.int32())),
-                pa.array([["d", "e"], ["f"], None, ["g"]],
-                         type=pa.list_(pa.binary()))
+                pa.array([["d", "e"], ["f"], None, ["g"]], type=pa.list_(pa.binary())),
             ],
             ["f1", "f2"],
-        )),
+        ),
+    ),
     dict(
         testcase_name="inconsecutive",
         row_indices=[1, 2, 3, 5],
         expected_output=pa.RecordBatch.from_arrays(
             [
                 pa.array([None, [4], [], [7]], type=pa.list_(pa.int32())),
-                pa.array([["b", "c"], None, [], ["d", "e"]],
-                         type=pa.list_(pa.binary()))
+                pa.array(
+                    [["b", "c"], None, [], ["d", "e"]], type=pa.list_(pa.binary())
+                ),
             ],
             ["f1", "f2"],
-        )),
+        ),
+    ),
     dict(
         testcase_name="inconsecutive_last_row_included",
         row_indices=[2, 3, 4, 5, 7, 8],
         expected_output=pa.RecordBatch.from_arrays(
             [
-                pa.array([[4], [], [5, 6], [7], [10], []],
-                         type=pa.list_(pa.int32())),
-                pa.array([None, [], None, ["d", "e"], None, ["g"]],
-                         type=pa.list_(pa.binary()))
+                pa.array([[4], [], [5, 6], [7], [10], []], type=pa.list_(pa.int32())),
+                pa.array(
+                    [None, [], None, ["d", "e"], None, ["g"]],
+                    type=pa.list_(pa.binary()),
+                ),
             ],
             ["f1", "f2"],
-        )),
+        ),
+    ),
 ]
 
 
 class RecordBatchTakeTest(parameterized.TestCase):
+    @parameterized.named_parameters(*_TAKE_TEST_CASES)
+    def test_success(self, row_indices, expected_output):
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 2, 3], None, [4], [], [5, 6], [7], [8, 9], [10], []],
+                    type=pa.list_(pa.int32()),
+                ),
+                pa.array(
+                    [["a"], ["b", "c"], None, [], None, ["d", "e"], ["f"], None, ["g"]],
+                    type=pa.list_(pa.binary()),
+                ),
+            ],
+            ["f1", "f2"],
+        )
 
-  @parameterized.named_parameters(*_TAKE_TEST_CASES)
-  def test_success(self, row_indices, expected_output):
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array([[1, 2, 3], None, [4], [], [5, 6], [7], [8, 9], [10], []],
-                 type=pa.list_(pa.int32())),
-        pa.array(
-            [["a"], ["b", "c"], None, [], None, ["d", "e"], ["f"], None, ["g"]],
-            type=pa.list_(pa.binary())),
-    ], ["f1", "f2"])
-
-    for row_indices_type in (pa.int32(), pa.int64()):
-      sliced = table_util.RecordBatchTake(
-          record_batch, pa.array(row_indices, type=row_indices_type))
-      self.assertTrue(
-          sliced.equals(expected_output),
-          "Expected {}, got {}".format(expected_output, sliced))
+        for row_indices_type in (pa.int32(), pa.int64()):
+            sliced = table_util.RecordBatchTake(
+                record_batch, pa.array(row_indices, type=row_indices_type)
+            )
+            self.assertTrue(
+                sliced.equals(expected_output),
+                f"Expected {expected_output}, got {sliced}",
+            )
 
 
 class CanonicalizeRecordBatchTest(parameterized.TestCase):
+    def test_canonicalize_record_batch(self):
+        rb_data = pa.RecordBatch.from_arrays(
+            [
+                pa.array([17, 30], pa.int32()),
+                pa.array(["english", "spanish"]),
+                pa.array([False, True]),
+                pa.array([False, True]),
+                pa.array([["ne"], ["s", "ted"]]),
+            ],
+            ["age", "language", "prediction", "label", "nested"],
+        )
 
-  def test_canonicalize_record_batch(self):
-    rb_data = pa.RecordBatch.from_arrays([
-        pa.array([17, 30], pa.int32()),
-        pa.array(["english", "spanish"]),
-        pa.array([False, True]),
-        pa.array([False, True]),
-        pa.array([["ne"], ["s", "ted"]])
-    ], ["age", "language", "prediction", "label", "nested"])
-
-    canonicalized_rb_data = table_util.CanonicalizeRecordBatch(rb_data)
-    self.assertEqual(canonicalized_rb_data.schema.names, rb_data.schema.names)
-
-    expected_age_column = pa.array([[17], [30]], type=pa.large_list(pa.int64()))
-    expected_language_column = pa.array([["english"], ["spanish"]],
-                                        type=pa.large_list(pa.large_binary()))
-    expected_prediction_column = pa.array([[0], [1]],
-                                          type=pa.large_list(pa.int8()))
-    expected_label_column = pa.array([[0], [1]], type=pa.large_list(pa.int8()))
-    expected_nested_column = pa.array([["ne"], ["s", "ted"]],
-                                      type=pa.large_list(pa.large_binary()))
-    self.assertTrue(
-        canonicalized_rb_data.column(
-            canonicalized_rb_data.schema.get_field_index("age")).equals(
-                expected_age_column))
-    self.assertTrue(
-        canonicalized_rb_data.column(
-            canonicalized_rb_data.schema.get_field_index("language")).equals(
-                expected_language_column))
-    self.assertTrue(
-        canonicalized_rb_data.column(
-            canonicalized_rb_data.schema.get_field_index("prediction")).equals(
-                expected_prediction_column))
-    self.assertTrue(
-        canonicalized_rb_data.column(
-            canonicalized_rb_data.schema.get_field_index("label")).equals(
-                expected_label_column))
-    self.assertTrue(
-        canonicalized_rb_data.column(
-            canonicalized_rb_data.schema.get_field_index("nested")).equals(
-                expected_nested_column))
-
-
-_INPUT_RECORD_BATCH = pa.RecordBatch.from_arrays([
-    pa.array([[1], [2, 3]]),
-    pa.array([[{
-        "sf1": ["a", "b"]
-    }], [{
-        "sf2": [{
-            "ssf1": [3]
-        }, {
-            "ssf1": [4]
-        }]
-    }]]),
-    pa.array([
-        {
-            "sf1": [[1, 2], [3]],
-            "sf2": [None],
-        },
-        None,
-    ]),
-], ["f1", "f2", "f3"])
+        canonicalized_rb_data = table_util.CanonicalizeRecordBatch(rb_data)
+        self.assertEqual(canonicalized_rb_data.schema.names, rb_data.schema.names)
 
+        expected_age_column = pa.array([[17], [30]], type=pa.large_list(pa.int64()))
+        expected_language_column = pa.array(
+            [["english"], ["spanish"]], type=pa.large_list(pa.large_binary())
+        )
+        expected_prediction_column = pa.array([[0], [1]], type=pa.large_list(pa.int8()))
+        expected_label_column = pa.array([[0], [1]], type=pa.large_list(pa.int8()))
+        expected_nested_column = pa.array(
+            [["ne"], ["s", "ted"]], type=pa.large_list(pa.large_binary())
+        )
+        self.assertTrue(
+            canonicalized_rb_data.column(
+                canonicalized_rb_data.schema.get_field_index("age")
+            ).equals(expected_age_column)
+        )
+        self.assertTrue(
+            canonicalized_rb_data.column(
+                canonicalized_rb_data.schema.get_field_index("language")
+            ).equals(expected_language_column)
+        )
+        self.assertTrue(
+            canonicalized_rb_data.column(
+                canonicalized_rb_data.schema.get_field_index("prediction")
+            ).equals(expected_prediction_column)
+        )
+        self.assertTrue(
+            canonicalized_rb_data.column(
+                canonicalized_rb_data.schema.get_field_index("label")
+            ).equals(expected_label_column)
+        )
+        self.assertTrue(
+            canonicalized_rb_data.column(
+                canonicalized_rb_data.schema.get_field_index("nested")
+            ).equals(expected_nested_column)
+        )
+
+
+_INPUT_RECORD_BATCH = pa.RecordBatch.from_arrays(
+    [
+        pa.array([[1], [2, 3]]),
+        pa.array([[{"sf1": ["a", "b"]}], [{"sf2": [{"ssf1": [3]}, {"ssf1": [4]}]}]]),
+        pa.array(
+            [
+                {
+                    "sf1": [[1, 2], [3]],
+                    "sf2": [None],
+                },
+                None,
+            ]
+        ),
+    ],
+    ["f1", "f2", "f3"],
+)
 
-ExpectedArray = collections.namedtuple(
-    "ExpectedArray", ["array", "parent_indices"])
+
+ExpectedArray = collections.namedtuple("ExpectedArray", ["array", "parent_indices"])
 _FEATURES_TO_ARRAYS = {
-    path.ColumnPath(["f1"]): ExpectedArray(
-        pa.array([[1], [2, 3]]), [0, 1]),
-    path.ColumnPath(["f2"]): ExpectedArray(pa.array([[{
-        "sf1": ["a", "b"]
-    }], [{
-        "sf2": [{
-            "ssf1": [3]
-        }, {
-            "ssf1": [4]
-        }]
-    }]]), [0, 1]),
-    path.ColumnPath(["f3"]): ExpectedArray(pa.array([{
-        "sf1": [[1, 2], [3]],
-        "sf2": [None],
-    }, None]), [0, 1]),
-    path.ColumnPath(["f2", "sf1"]): ExpectedArray(
-        pa.array([["a", "b"], None]), [0, 1]),
+    path.ColumnPath(["f1"]): ExpectedArray(pa.array([[1], [2, 3]]), [0, 1]),
+    path.ColumnPath(["f2"]): ExpectedArray(
+        pa.array([[{"sf1": ["a", "b"]}], [{"sf2": [{"ssf1": [3]}, {"ssf1": [4]}]}]]),
+        [0, 1],
+    ),
+    path.ColumnPath(["f3"]): ExpectedArray(
+        pa.array(
+            [
+                {
+                    "sf1": [[1, 2], [3]],
+                    "sf2": [None],
+                },
+                None,
+            ]
+        ),
+        [0, 1],
+    ),
+    path.ColumnPath(["f2", "sf1"]): ExpectedArray(pa.array([["a", "b"], None]), [0, 1]),
     path.ColumnPath(["f2", "sf2"]): ExpectedArray(
-        pa.array([None, [{
-            "ssf1": [3]
-        }, {
-            "ssf1": [4]
-        }]]), [0, 1]),
-    path.ColumnPath(["f2", "sf2", "ssf1"]): ExpectedArray(
-        pa.array([[3], [4]]), [1, 1]),
-    path.ColumnPath(["f3", "sf1"]): ExpectedArray(pa.array(
-        [[[1, 2], [3]], None]), [0, 1]),
-    path.ColumnPath(["f3", "sf2"]): ExpectedArray(
-        pa.array([[None], None]), [0, 1]),
+        pa.array([None, [{"ssf1": [3]}, {"ssf1": [4]}]]), [0, 1]
+    ),
+    path.ColumnPath(["f2", "sf2", "ssf1"]): ExpectedArray(pa.array([[3], [4]]), [1, 1]),
+    path.ColumnPath(["f3", "sf1"]): ExpectedArray(
+        pa.array([[[1, 2], [3]], None]), [0, 1]
+    ),
+    path.ColumnPath(["f3", "sf2"]): ExpectedArray(pa.array([[None], None]), [0, 1]),
 }
 
 
 class EnumerateStructNullValueTestData(NamedTuple):
-  """Inputs and outputs for enumeration with pa.StructArrays with null values."""
-  description: str
-  """Summary of test"""
-  batch: pa.RecordBatch
-  """Input Record Batch"""
-  expected_results: Dict[path.ColumnPath, pa.array]
-  """The expected output."""
-
-
-def _make_enumerate_data_with_missing_data_at_leaves(
-    ) -> Iterable[EnumerateStructNullValueTestData]:
-  """Test that having only nulls at leaf values gets translated correctly."""
-  test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
-  struct_column_as_list_dicts = [
-      [],  # first element of 'c'; note this is not counted as missing.
-      [  # second element of 'c' -- a list<struct> of length 2.
-          {
-              "f2": [2.0],
-          },
-          None,  # f2 is missing
-      ],
-      [  # third element of 'c'
-          None,  # f2 is missing
-      ],
-      [],  # fourth element of 'c'; note this is not counted as missing.
-  ]
-
-  array = pa.array(struct_column_as_list_dicts, type=test_data_type)
-
-  batch = pa.RecordBatch.from_arrays([array], ["c"])
-
-  full_expected_results = {
-      path.ColumnPath(["c"]):
-          pa.array([[], [{
-              "f2": [2.0]
-          }, None], [None], []]),
-      path.ColumnPath(["c", "f2"]):
-          pa.array([[2.0], None, None]),
-  }
-  yield "Basic", batch, full_expected_results
-
-
-def _make_enumerate_test_data_with_null_values_and_sliced_batches(
-    ) -> Iterable[EnumerateStructNullValueTestData]:
-  """Yields test data for sliced data where all slicing is consistent.
-
-  Pyarrow slices with zero copy, sometimes subtle bugs can
-  arise when processing sliced data.
-  """
-  test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
-  struct_column_as_list_dicts = [
-      [],  # first element of 'c'; note this is not counted as missing.
-      [  # second element of 'c' -- a list<struct> of length 2.
-          {
-              "f2": [2.0],
-          },
-          None,  # f2 is missing
-      ],
-      [  # third element of 'c'
-          None,  # f2 is missing
-      ],
-      [],  # fourth element of 'c'; note this is not counted as missing.
-  ]
-
-  array = pa.array(struct_column_as_list_dicts, type=test_data_type)
-
-  batch = pa.RecordBatch.from_arrays([array], ["c"])
-  slice_start, slice_end = 1, 3
-  batch = pa.RecordBatch.from_arrays([array[slice_start:slice_end]], ["c"])
-
-  sliced_expected_results = {
-      path.ColumnPath(["c"]): pa.array([[{
-          "f2": [2.0]
-      }, None], [None]]),
-      path.ColumnPath(["c", "f2"]): pa.array([[2.0], None, None]),
-  }
-  # Test case 1: slicing the array.
-  yield "SlicedArray", batch, sliced_expected_results
-
-  batch = pa.RecordBatch.from_arrays([array], ["c"])[slice_start:slice_end]
-  # Test case 2: slicing the RecordBatch.
-  yield "SlicedRecordBatch", batch, sliced_expected_results
-
-
-def _make_enumerate_test_data_with_null_top_level(
-    ) -> Iterable[EnumerateStructNullValueTestData]:
-  """Yields test data with a top level list element is missing."""
-  test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
-  struct_column_as_list_dicts = [
-      [],  # first element of 'c'; note this is not counted as missing.
-      None,  # c is missing.
-      [   # third element of 'c'
-          None,  # f2 is missing
-      ],
-      [],  # fourth element of 'c'; note this is not counted as missing.
-  ]
-  array = pa.array(
-      struct_column_as_list_dicts, type=test_data_type)
-  validity_buffer_with_null = array.buffers()[0]
-  array_with_null_indicator = pa.Array.from_buffers(
-      array.type,
-      len(array) + array.offset,
-      [validity_buffer_with_null, array.buffers()[1]],
-      offset=0,
-      children=[array.values])
-  batch_with_missing_entry = pa.RecordBatch.from_arrays(
-      [array_with_null_indicator], ["c"])
-  missing_expected_results = {
-      path.ColumnPath(["c"]):
-          pa.array([[], None, [None], []], type=test_data_type),
-      path.ColumnPath(["c", "f2"]):
-          pa.array([None], type=pa.list_(pa.float64())),
-  }
-  yield ("ValuesPresentWithNullIndicator", batch_with_missing_entry,
-         missing_expected_results)
-
-
-def _make_enumerate_test_data_with_slices_at_different_offsets(
-    ) -> Iterable[EnumerateStructNullValueTestData]:
-  """Yields a test cases constructed from array slices with different offsets.
-
-  Slicing in pyarrow is zero copy, which can have subtle bugs, so ensure
-  the code works under more obscure situations.
-  """
-  total_size = 10
-  values_array = pa.array(range(total_size), type=pa.int64())
-  # create 5 pyarrow.Array object each of size from the original array ([0,1],
-  # [2,3], etc
-  slices = [
-      values_array[start:end] for (start, end)
-      in zip(range(0, total_size + 1, 2), range(2, total_size + 1, 2))
-  ]  # pyformat: disable
-  validity = pa.array([True, False], type=pa.bool_())
-  # Label fields from "0" to "5"
-  new_type = pa.struct([pa.field(str(sl[0].as_py() // 2), sl.type)
-                        for sl in slices])
-  # Using the value buffer of validity as composed_struct's validity bitmap
-  # buffer.
-  composed_struct = pa.StructArray.from_buffers(
-      new_type, len(slices[0]), [validity.buffers()[1]], children=slices)
-  sliced_batch = pa.RecordBatch.from_arrays([composed_struct], ["c"])
-  sliced_expected_results = {
-      path.ColumnPath(["c"]):
-          pa.array([
-              [{"0": 0, "1": 2, "2": 4, "3": 6, "4": 8}],
-              None,
-          ]),
-      path.ColumnPath(["c", "0"]): pa.array([0, None], type=pa.int64()),
-      path.ColumnPath(["c", "1"]): pa.array([2, None], type=pa.int64()),
-      path.ColumnPath(["c", "2"]): pa.array([4, None], type=pa.int64()),
-      path.ColumnPath(["c", "3"]): pa.array([6, None], type=pa.int64()),
-      path.ColumnPath(["c", "4"]): pa.array([8, None], type=pa.int64()),
-  }  # pyformat: disable
-  yield ("SlicedArrayWithOffests", sliced_batch, sliced_expected_results)
-
+    """Inputs and outputs for enumeration with pa.StructArrays with null values."""
+
+    description: str
+    """Summary of test"""
+    batch: pa.RecordBatch
+    """Input Record Batch"""
+    expected_results: Dict[path.ColumnPath, pa.array]
+    """The expected output."""
+
+
+def _make_enumerate_data_with_missing_data_at_leaves() -> (
+    Iterable[EnumerateStructNullValueTestData]
+):
+    """Test that having only nulls at leaf values gets translated correctly."""
+    test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
+    struct_column_as_list_dicts = [
+        [],  # first element of 'c'; note this is not counted as missing.
+        [  # second element of 'c' -- a list<struct> of length 2.
+            {
+                "f2": [2.0],
+            },
+            None,  # f2 is missing
+        ],
+        [  # third element of 'c'
+            None,  # f2 is missing
+        ],
+        [],  # fourth element of 'c'; note this is not counted as missing.
+    ]
 
-def _normalize(array: pa.Array) -> pa.Array:
-  """Round trips array through python objects.
+    array = pa.array(struct_column_as_list_dicts, type=test_data_type)
 
-  Comparing nested arrays with slices is buggy in Arrow 2.0 this method
-  is useful comparing two such arrays for logical equality. The bugs
-  appears to be fixed as of Arrow 5.0 this should be removable once that
-  becomes the minimum version.
+    batch = pa.RecordBatch.from_arrays([array], ["c"])
 
-  Args:
-    array: The array to normalize.
+    full_expected_results = {
+        path.ColumnPath(["c"]): pa.array([[], [{"f2": [2.0]}, None], [None], []]),
+        path.ColumnPath(["c", "f2"]): pa.array([[2.0], None, None]),
+    }
+    yield "Basic", batch, full_expected_results
 
-  Returns:
-    An array that doesn't have any more zero copy slices in itself or
-    it's children. Note the schema might be slightly different for
-    all null arrays.
-  """
-  return pa.array(array.to_pylist())
 
+def _make_enumerate_test_data_with_null_values_and_sliced_batches() -> (
+    Iterable[EnumerateStructNullValueTestData]
+):
+    """Yields test data for sliced data where all slicing is consistent.
 
-class TableUtilTest(parameterized.TestCase):
+    Pyarrow slices with zero copy, sometimes subtle bugs can
+    arise when processing sliced data.
+    """
+    test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
+    struct_column_as_list_dicts = [
+        [],  # first element of 'c'; note this is not counted as missing.
+        [  # second element of 'c' -- a list<struct> of length 2.
+            {
+                "f2": [2.0],
+            },
+            None,  # f2 is missing
+        ],
+        [  # third element of 'c'
+            None,  # f2 is missing
+        ],
+        [],  # fourth element of 'c'; note this is not counted as missing.
+    ]
 
-  def test_get_array_empty_path(self):
-    with self.assertRaisesRegex(KeyError, r"query_path must be non-empty.*"):
-      table_util.get_array(
-          pa.RecordBatch.from_arrays([pa.array([[1], [2, 3]])], ["v"]),
-          query_path=path.ColumnPath([]),
-          return_example_indices=False,
-      )
-
-  def test_get_array_column_missing(self):
-    with self.assertRaisesRegex(
-        KeyError, r"query_path step 0 \(x\) not in record batch.*"
-    ):
-      table_util.get_array(
-          pa.RecordBatch.from_arrays([pa.array([[1], [2]])], ["y"]),
-          query_path=path.ColumnPath(["x"]),
-          return_example_indices=False,
-      )
-
-  def test_get_array_step_missing(self):
-    with self.assertRaisesRegex(
-        KeyError, r"query_path step \(ssf3\) not in struct.*"
-    ):
-      table_util.get_array(
-          _INPUT_RECORD_BATCH,
-          query_path=path.ColumnPath(["f2", "sf2", "ssf3"]),
-          return_example_indices=False,
-      )
-
-  def test_get_array_return_example_indices(self):
-    record_batch = pa.RecordBatch.from_arrays(
-        [
-            pa.array([
-                [{"sf": [{"ssf": [1]}, {"ssf": [2]}]}],
-                [{"sf": [{"ssf": [3, 4]}]}],
-            ]),
-            pa.array([["one"], ["two"]]),
+    array = pa.array(struct_column_as_list_dicts, type=test_data_type)
+
+    batch = pa.RecordBatch.from_arrays([array], ["c"])
+    slice_start, slice_end = 1, 3
+    batch = pa.RecordBatch.from_arrays([array[slice_start:slice_end]], ["c"])
+
+    sliced_expected_results = {
+        path.ColumnPath(["c"]): pa.array([[{"f2": [2.0]}, None], [None]]),
+        path.ColumnPath(["c", "f2"]): pa.array([[2.0], None, None]),
+    }
+    # Test case 1: slicing the array.
+    yield "SlicedArray", batch, sliced_expected_results
+
+    batch = pa.RecordBatch.from_arrays([array], ["c"])[slice_start:slice_end]
+    # Test case 2: slicing the RecordBatch.
+    yield "SlicedRecordBatch", batch, sliced_expected_results
+
+
+def _make_enumerate_test_data_with_null_top_level() -> (
+    Iterable[EnumerateStructNullValueTestData]
+):
+    """Yields test data with a top level list element is missing."""
+    test_data_type = pa.list_(pa.struct([("f2", pa.list_(pa.float64()))]))
+    struct_column_as_list_dicts = [
+        [],  # first element of 'c'; note this is not counted as missing.
+        None,  # c is missing.
+        [  # third element of 'c'
+            None,  # f2 is missing
         ],
-        ["f", "w"],
+        [],  # fourth element of 'c'; note this is not counted as missing.
+    ]
+    array = pa.array(struct_column_as_list_dicts, type=test_data_type)
+    validity_buffer_with_null = array.buffers()[0]
+    array_with_null_indicator = pa.Array.from_buffers(
+        array.type,
+        len(array) + array.offset,
+        [validity_buffer_with_null, array.buffers()[1]],
+        offset=0,
+        children=[array.values],
     )
-    feature = path.ColumnPath(["f", "sf", "ssf"])
-    actual_arr, actual_indices = table_util.get_array(
-        record_batch, feature, return_example_indices=True
+    batch_with_missing_entry = pa.RecordBatch.from_arrays(
+        [array_with_null_indicator], ["c"]
     )
-    expected_arr = pa.array([[1], [2], [3, 4]])
-    expected_indices = np.array([0, 0, 1])
-    self.assertTrue(
-        actual_arr.equals(expected_arr),
-        "\nfeature: {};\nexpected:\n{};\nactual:\n{}".format(
-            feature, expected_arr, actual_arr
-        ),
+    missing_expected_results = {
+        path.ColumnPath(["c"]): pa.array([[], None, [None], []], type=test_data_type),
+        path.ColumnPath(["c", "f2"]): pa.array([None], type=pa.list_(pa.float64())),
+    }
+    yield (
+        "ValuesPresentWithNullIndicator",
+        batch_with_missing_entry,
+        missing_expected_results,
     )
-    np.testing.assert_array_equal(expected_indices, actual_indices)
 
-  def test_get_array_subpath_missing(self):
-    with self.assertRaisesRegex(
-        KeyError, r"Cannot process .* \(sssf\) inside .* list<item: int64>.*"
-    ):
-      table_util.get_array(
-          _INPUT_RECORD_BATCH,
-          query_path=path.ColumnPath(["f2", "sf2", "ssf1", "sssf"]),
-          return_example_indices=False,
-      )
-
-  @parameterized.named_parameters(
-      ((str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items())
-  )
-  def test_get_array(self, feature, expected):
-    actual_arr, actual_indices = table_util.get_array(
-        _INPUT_RECORD_BATCH,
-        feature,
-        return_example_indices=True,
-        wrap_flat_struct_in_list=False,
-    )
-    expected_arr, expected_indices = expected
-    self.assertTrue(
-        actual_arr.equals(expected_arr),
-        "\nfeature: {};\nexpected:\n{};\nactual:\n{}".format(
-            feature, expected_arr, actual_arr
-        ),
-    )
-    np.testing.assert_array_equal(expected_indices, actual_indices)
-
-  @parameterized.named_parameters(
-      ((str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items())
-  )
-  def test_get_array_no_broadcast(self, feature, expected):
-    actual_arr, actual_indices = table_util.get_array(
-        _INPUT_RECORD_BATCH,
-        feature,
-        return_example_indices=False,
-        wrap_flat_struct_in_list=False,
+
+def _make_enumerate_test_data_with_slices_at_different_offsets() -> (
+    Iterable[EnumerateStructNullValueTestData]
+):
+    """Yields a test cases constructed from array slices with different offsets.
+
+    Slicing in pyarrow is zero copy, which can have subtle bugs, so ensure
+    the code works under more obscure situations.
+    """
+    total_size = 10
+    values_array = pa.array(range(total_size), type=pa.int64())
+    # create 5 pyarrow.Array object each of size from the original array ([0,1],
+    # [2,3], etc
+    slices = [
+        values_array[start:end]
+        for (start, end) in zip(
+            range(0, total_size + 1, 2), range(2, total_size + 1, 2)
+        )
+    ]  # pyformat: disable
+    validity = pa.array([True, False], type=pa.bool_())
+    # Label fields from "0" to "5"
+    new_type = pa.struct([pa.field(str(sl[0].as_py() // 2), sl.type) for sl in slices])
+    # Using the value buffer of validity as composed_struct's validity bitmap
+    # buffer.
+    composed_struct = pa.StructArray.from_buffers(
+        new_type, len(slices[0]), [validity.buffers()[1]], children=slices
     )
-    expected_arr, _ = expected
-    self.assertTrue(
-        actual_arr.equals(expected_arr),
-        "\nfeature: {};\nexpected:\n{};\nactual:\n{}".format(
-            feature, expected_arr, actual_arr
+    sliced_batch = pa.RecordBatch.from_arrays([composed_struct], ["c"])
+    sliced_expected_results = {
+        path.ColumnPath(["c"]): pa.array(
+            [
+                [{"0": 0, "1": 2, "2": 4, "3": 6, "4": 8}],
+                None,
+            ]
         ),
+        path.ColumnPath(["c", "0"]): pa.array([0, None], type=pa.int64()),
+        path.ColumnPath(["c", "1"]): pa.array([2, None], type=pa.int64()),
+        path.ColumnPath(["c", "2"]): pa.array([4, None], type=pa.int64()),
+        path.ColumnPath(["c", "3"]): pa.array([6, None], type=pa.int64()),
+        path.ColumnPath(["c", "4"]): pa.array([8, None], type=pa.int64()),
+    }  # pyformat: disable
+    yield ("SlicedArrayWithOffests", sliced_batch, sliced_expected_results)
+
+
+def _normalize(array: pa.Array) -> pa.Array:
+    """Round trips array through python objects.
+
+    Comparing nested arrays with slices is buggy in Arrow 2.0 this method
+    is useful comparing two such arrays for logical equality. The bugs
+    appears to be fixed as of Arrow 5.0 this should be removable once that
+    becomes the minimum version.
+
+    Args:
+    ----
+      array: The array to normalize.
+
+    Returns:
+    -------
+      An array that doesn't have any more zero copy slices in itself or
+      it's children. Note the schema might be slightly different for
+      all null arrays.
+    """
+    return pa.array(array.to_pylist())
+
+
+class TableUtilTest(parameterized.TestCase):
+    def test_get_array_empty_path(self):
+        with self.assertRaisesRegex(KeyError, r"query_path must be non-empty.*"):
+            table_util.get_array(
+                pa.RecordBatch.from_arrays([pa.array([[1], [2, 3]])], ["v"]),
+                query_path=path.ColumnPath([]),
+                return_example_indices=False,
+            )
+
+    def test_get_array_column_missing(self):
+        with self.assertRaisesRegex(
+            KeyError, r"query_path step 0 \(x\) not in record batch.*"
+        ):
+            table_util.get_array(
+                pa.RecordBatch.from_arrays([pa.array([[1], [2]])], ["y"]),
+                query_path=path.ColumnPath(["x"]),
+                return_example_indices=False,
+            )
+
+    def test_get_array_step_missing(self):
+        with self.assertRaisesRegex(
+            KeyError, r"query_path step \(ssf3\) not in struct.*"
+        ):
+            table_util.get_array(
+                _INPUT_RECORD_BATCH,
+                query_path=path.ColumnPath(["f2", "sf2", "ssf3"]),
+                return_example_indices=False,
+            )
+
+    def test_get_array_return_example_indices(self):
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [
+                        [{"sf": [{"ssf": [1]}, {"ssf": [2]}]}],
+                        [{"sf": [{"ssf": [3, 4]}]}],
+                    ]
+                ),
+                pa.array([["one"], ["two"]]),
+            ],
+            ["f", "w"],
+        )
+        feature = path.ColumnPath(["f", "sf", "ssf"])
+        actual_arr, actual_indices = table_util.get_array(
+            record_batch, feature, return_example_indices=True
+        )
+        expected_arr = pa.array([[1], [2], [3, 4]])
+        expected_indices = np.array([0, 0, 1])
+        self.assertTrue(
+            actual_arr.equals(expected_arr),
+            f"\nfeature: {feature};\nexpected:\n{expected_arr};\nactual:\n{actual_arr}",
+        )
+        np.testing.assert_array_equal(expected_indices, actual_indices)
+
+    def test_get_array_subpath_missing(self):
+        with self.assertRaisesRegex(
+            KeyError, r"Cannot process .* \(sssf\) inside .* list<item: int64>.*"
+        ):
+            table_util.get_array(
+                _INPUT_RECORD_BATCH,
+                query_path=path.ColumnPath(["f2", "sf2", "ssf1", "sssf"]),
+                return_example_indices=False,
+            )
+
+    @parameterized.named_parameters(
+        (str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items()
     )
-    self.assertIsNone(actual_indices)
-
-  @parameterized.named_parameters(
-      ((str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items())
-  )
-  def test_get_array_wrap_flat_struct_array(self, feature, expected):
-    actual_arr, actual_indices = table_util.get_array(
-        _INPUT_RECORD_BATCH,
-        feature,
-        return_example_indices=True,
-        wrap_flat_struct_in_list=True,
-    )
-    expected_arr, expected_indices = expected
-    if pa.types.is_struct(expected_arr.type):
-      expected_arr = array_util.ToSingletonListArray(expected_arr)
-    self.assertTrue(
-        actual_arr.equals(expected_arr),
-        "\nfeature: {};\nexpected:\n{};\nactual:\n{}".format(
-            feature, expected_arr, actual_arr
-        ),
+    def test_get_array(self, feature, expected):
+        actual_arr, actual_indices = table_util.get_array(
+            _INPUT_RECORD_BATCH,
+            feature,
+            return_example_indices=True,
+            wrap_flat_struct_in_list=False,
+        )
+        expected_arr, expected_indices = expected
+        self.assertTrue(
+            actual_arr.equals(expected_arr),
+            f"\nfeature: {feature};\nexpected:\n{expected_arr};\nactual:\n{actual_arr}",
+        )
+        np.testing.assert_array_equal(expected_indices, actual_indices)
+
+    @parameterized.named_parameters(
+        (str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items()
     )
-    np.testing.assert_array_equal(expected_indices, actual_indices)
+    def test_get_array_no_broadcast(self, feature, expected):
+        actual_arr, actual_indices = table_util.get_array(
+            _INPUT_RECORD_BATCH,
+            feature,
+            return_example_indices=False,
+            wrap_flat_struct_in_list=False,
+        )
+        expected_arr, _ = expected
+        self.assertTrue(
+            actual_arr.equals(expected_arr),
+            f"\nfeature: {feature};\nexpected:\n{expected_arr};\nactual:\n{actual_arr}",
+        )
+        self.assertIsNone(actual_indices)
 
-  def test_enumerate_arrays(self):
-    for leaves_only, wrap_flat_struct_in_list in itertools.product(
-        [True, False], [True, False]
-    ):
-      actual_results = {}
-      for feature_path, feature_array in table_util.enumerate_arrays(
-          _INPUT_RECORD_BATCH,
-          leaves_only,
-          wrap_flat_struct_in_list,
-      ):
-        actual_results[feature_path] = feature_array
-
-      expected_results = {}
-      # leaf fields
-      for p in [
-          ["f1"],
-          ["f2", "sf1"],
-          ["f2", "sf2", "ssf1"],
-          ["f3", "sf1"],
-          ["f3", "sf2"],
-      ]:
-        feature_path = path.ColumnPath(p)
-        expected_results[feature_path] = (
-            _FEATURES_TO_ARRAYS[feature_path].array
+    @parameterized.named_parameters(
+        (str(f), f, expected) for (f, expected) in _FEATURES_TO_ARRAYS.items()
+    )
+    def test_get_array_wrap_flat_struct_array(self, feature, expected):
+        actual_arr, actual_indices = table_util.get_array(
+            _INPUT_RECORD_BATCH,
+            feature,
+            return_example_indices=True,
+            wrap_flat_struct_in_list=True,
         )
-      if not leaves_only:
-        for p in [["f2"], ["f2", "sf2"], ["f3"]]:
-          feature_path = path.ColumnPath(p)
-          expected_array = _FEATURES_TO_ARRAYS[feature_path][0]
-          if wrap_flat_struct_in_list and pa.types.is_struct(
-              expected_array.type
-          ):
-            expected_array = array_util.ToSingletonListArray(expected_array)
-          expected_results[feature_path] = expected_array
-
-      self.assertLen(actual_results, len(expected_results))
-      for k, v in six.iteritems(expected_results):
-        self.assertIn(k, actual_results)
-        actual = actual_results[k]
+        expected_arr, expected_indices = expected
+        if pa.types.is_struct(expected_arr.type):
+            expected_arr = array_util.ToSingletonListArray(expected_arr)
         self.assertTrue(
-            actual[0].equals(v[0]),
-            "leaves_only={}; "
-            "wrap_flat_struct_in_list={} feature={}; expected: {}; actual: {}"
-            .format(
-                leaves_only, wrap_flat_struct_in_list, k, v, actual
-            ),
+            actual_arr.equals(expected_arr),
+            f"\nfeature: {feature};\nexpected:\n{expected_arr};\nactual:\n{actual_arr}",
+        )
+        np.testing.assert_array_equal(expected_indices, actual_indices)
+
+    def test_enumerate_arrays(self):
+        for leaves_only, wrap_flat_struct_in_list in itertools.product(
+            [True, False], [True, False]
+        ):
+            actual_results = {}
+            for feature_path, feature_array in table_util.enumerate_arrays(
+                _INPUT_RECORD_BATCH,
+                leaves_only,
+                wrap_flat_struct_in_list,
+            ):
+                actual_results[feature_path] = feature_array
+
+            expected_results = {}
+            # leaf fields
+            for p in [
+                ["f1"],
+                ["f2", "sf1"],
+                ["f2", "sf2", "ssf1"],
+                ["f3", "sf1"],
+                ["f3", "sf2"],
+            ]:
+                feature_path = path.ColumnPath(p)
+                expected_results[feature_path] = _FEATURES_TO_ARRAYS[feature_path].array
+            if not leaves_only:
+                for p in [["f2"], ["f2", "sf2"], ["f3"]]:
+                    feature_path = path.ColumnPath(p)
+                    expected_array = _FEATURES_TO_ARRAYS[feature_path][0]
+                    if wrap_flat_struct_in_list and pa.types.is_struct(
+                        expected_array.type
+                    ):
+                        expected_array = array_util.ToSingletonListArray(expected_array)
+                    expected_results[feature_path] = expected_array
+
+            self.assertLen(actual_results, len(expected_results))
+            for k, v in six.iteritems(expected_results):
+                self.assertIn(k, actual_results)
+                actual = actual_results[k]
+                self.assertTrue(
+                    actual[0].equals(v[0]),
+                    f"leaves_only={leaves_only}; "
+                    f"wrap_flat_struct_in_list={wrap_flat_struct_in_list} feature={k}; expected: {v}; actual: {actual}",
+                )
+                np.testing.assert_array_equal(actual[1], v[1])
+
+    @parameterized.named_parameters(
+        itertools.chain(
+            _make_enumerate_data_with_missing_data_at_leaves(),
+            _make_enumerate_test_data_with_null_values_and_sliced_batches(),
+            _make_enumerate_test_data_with_null_top_level(),
+            _make_enumerate_test_data_with_slices_at_different_offsets(),
         )
-        np.testing.assert_array_equal(actual[1], v[1])
-
-  @parameterized.named_parameters(
-      itertools.chain(
-          _make_enumerate_data_with_missing_data_at_leaves(),
-          _make_enumerate_test_data_with_null_values_and_sliced_batches(),
-          _make_enumerate_test_data_with_null_top_level(),
-          _make_enumerate_test_data_with_slices_at_different_offsets(),
-      )
-  )
-  def test_enumerate_missing_propogated_in_flattened_struct(
-      self, batch, expected_results
-  ):
-    actual_results = {}
-    for feature_path, feature_array in table_util.enumerate_arrays(
-        batch, enumerate_leaves_only=False
+    )
+    def test_enumerate_missing_propogated_in_flattened_struct(
+        self, batch, expected_results
     ):
-      actual_results[feature_path] = feature_array
-    self.assertLen(actual_results, len(expected_results))
-    for k, v in six.iteritems(expected_results):
-      assert k in actual_results, (k, list(actual_results.keys()))
-      self.assertIn(k, actual_results)
-      actual = _normalize(actual_results[k])
-      v = _normalize(v)
-      self.assertTrue(
-          actual.equals(v),
-          "feature={}; expected: {}; actual: {}; diff: {}".format(
-              k, v, actual, actual.diff(v)
-          ),
-      )
+        actual_results = {}
+        for feature_path, feature_array in table_util.enumerate_arrays(
+            batch, enumerate_leaves_only=False
+        ):
+            actual_results[feature_path] = feature_array
+        self.assertLen(actual_results, len(expected_results))
+        for k, v in six.iteritems(expected_results):
+            assert k in actual_results, (k, list(actual_results.keys()))
+            self.assertIn(k, actual_results)
+            actual = _normalize(actual_results[k])
+            v = _normalize(v)
+            self.assertTrue(
+                actual.equals(v),
+                f"feature={k}; expected: {v}; actual: {actual}; diff: {actual.diff(v)}",
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/beam/pickle_helpers.py b/tfx_bsl/beam/pickle_helpers.py
index 6b64e41d..e572cf57 100644
--- a/tfx_bsl/beam/pickle_helpers.py
+++ b/tfx_bsl/beam/pickle_helpers.py
@@ -21,121 +21,121 @@
 # TODO(b/281148738): Remove this once all supported Beam versions depend on dill
 # with updated pickling logic or this is fixed in Beam.
 def fix_code_type_pickling() -> None:
-  """Overrides `CodeType` pickling to prevent segfaults in Python 3.10."""
-  # Based on the `save_code` from dill-0.3.6.
-  # https://github.com/uqfoundation/dill/blob/d5c4dccbe19fb27bfd757cb60abd2899fd9e59ba/dill/_dill.py#L1105
-  # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
-  # Copyright (c) 2008-2015 California Institute of Technology.
-  # Copyright (c) 2016-2023 The Uncertainty Quantification Foundation.
-  # License: 3-clause BSD.  The full license text is available at:
-  #  - https://github.com/uqfoundation/dill/blob/master/LICENSE
+    """Overrides `CodeType` pickling to prevent segfaults in Python 3.10."""
+    # Based on the `save_code` from dill-0.3.6.
+    # https://github.com/uqfoundation/dill/blob/d5c4dccbe19fb27bfd757cb60abd2899fd9e59ba/dill/_dill.py#L1105
+    # Author: Mike McKerns (mmckerns @caltech and @uqfoundation)
+    # Copyright (c) 2008-2015 California Institute of Technology.
+    # Copyright (c) 2016-2023 The Uncertainty Quantification Foundation.
+    # License: 3-clause BSD.  The full license text is available at:
+    #  - https://github.com/uqfoundation/dill/blob/master/LICENSE
 
-  # The following function is also based on 'save_codeobject' from 'cloudpickle'
-  # Copyright (c) 2012, Regents of the University of California.
-  # Copyright (c) 2009 `PiCloud, Inc. <http://www.picloud.com>`_.
-  # License: 3-clause BSD.  The full license text is available at:
-  #  - https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE
-  @dill.register(types.CodeType)
-  def save_code(pickler, obj):  # pylint: disable=unused-variable
-    if hasattr(obj, 'co_endlinetable'):  # python 3.11a (20 args)
-      args = (
-          obj.co_argcount,
-          obj.co_posonlyargcount,
-          obj.co_kwonlyargcount,
-          obj.co_nlocals,
-          obj.co_stacksize,
-          obj.co_flags,
-          obj.co_code,
-          obj.co_consts,
-          obj.co_names,
-          obj.co_varnames,
-          obj.co_filename,
-          obj.co_name,
-          obj.co_qualname,
-          obj.co_firstlineno,
-          obj.co_linetable,
-          obj.co_endlinetable,
-          obj.co_columntable,
-          obj.co_exceptiontable,
-          obj.co_freevars,
-          obj.co_cellvars,
-      )
-    elif hasattr(obj, 'co_exceptiontable'):  # python 3.11 (18 args)
-      args = (
-          obj.co_argcount,
-          obj.co_posonlyargcount,
-          obj.co_kwonlyargcount,
-          obj.co_nlocals,
-          obj.co_stacksize,
-          obj.co_flags,
-          obj.co_code,
-          obj.co_consts,
-          obj.co_names,
-          obj.co_varnames,
-          obj.co_filename,
-          obj.co_name,
-          obj.co_qualname,
-          obj.co_firstlineno,
-          obj.co_linetable,
-          obj.co_exceptiontable,
-          obj.co_freevars,
-          obj.co_cellvars,
-      )
-    elif hasattr(obj, 'co_linetable'):  # python 3.10 (16 args)
-      args = (
-          obj.co_argcount,
-          obj.co_posonlyargcount,
-          obj.co_kwonlyargcount,
-          obj.co_nlocals,
-          obj.co_stacksize,
-          obj.co_flags,
-          obj.co_code,
-          obj.co_consts,
-          obj.co_names,
-          obj.co_varnames,
-          obj.co_filename,
-          obj.co_name,
-          obj.co_firstlineno,
-          obj.co_linetable,
-          obj.co_freevars,
-          obj.co_cellvars,
-      )
-    elif hasattr(obj, 'co_posonlyargcount'):  # python 3.8 (16 args)
-      args = (
-          obj.co_argcount,
-          obj.co_posonlyargcount,
-          obj.co_kwonlyargcount,
-          obj.co_nlocals,
-          obj.co_stacksize,
-          obj.co_flags,
-          obj.co_code,
-          obj.co_consts,
-          obj.co_names,
-          obj.co_varnames,
-          obj.co_filename,
-          obj.co_name,
-          obj.co_firstlineno,
-          obj.co_lnotab,
-          obj.co_freevars,
-          obj.co_cellvars,
-      )
-    else:  # python 3.7 (15 args)
-      args = (
-          obj.co_argcount,
-          obj.co_kwonlyargcount,
-          obj.co_nlocals,
-          obj.co_stacksize,
-          obj.co_flags,
-          obj.co_code,
-          obj.co_consts,
-          obj.co_names,
-          obj.co_varnames,
-          obj.co_filename,
-          obj.co_name,
-          obj.co_firstlineno,
-          obj.co_lnotab,
-          obj.co_freevars,
-          obj.co_cellvars,
-      )
+    # The following function is also based on 'save_codeobject' from 'cloudpickle'
+    # Copyright (c) 2012, Regents of the University of California.
+    # Copyright (c) 2009 `PiCloud, Inc. <http://www.picloud.com>`_.
+    # License: 3-clause BSD.  The full license text is available at:
+    #  - https://github.com/cloudpipe/cloudpickle/blob/master/LICENSE
+    @dill.register(types.CodeType)
+    def save_code(pickler, obj):  # pylint: disable=unused-variable
+        if hasattr(obj, "co_endlinetable"):  # python 3.11a (20 args)
+            args = (
+                obj.co_argcount,
+                obj.co_posonlyargcount,
+                obj.co_kwonlyargcount,
+                obj.co_nlocals,
+                obj.co_stacksize,
+                obj.co_flags,
+                obj.co_code,
+                obj.co_consts,
+                obj.co_names,
+                obj.co_varnames,
+                obj.co_filename,
+                obj.co_name,
+                obj.co_qualname,
+                obj.co_firstlineno,
+                obj.co_linetable,
+                obj.co_endlinetable,
+                obj.co_columntable,
+                obj.co_exceptiontable,
+                obj.co_freevars,
+                obj.co_cellvars,
+            )
+        elif hasattr(obj, "co_exceptiontable"):  # python 3.11 (18 args)
+            args = (
+                obj.co_argcount,
+                obj.co_posonlyargcount,
+                obj.co_kwonlyargcount,
+                obj.co_nlocals,
+                obj.co_stacksize,
+                obj.co_flags,
+                obj.co_code,
+                obj.co_consts,
+                obj.co_names,
+                obj.co_varnames,
+                obj.co_filename,
+                obj.co_name,
+                obj.co_qualname,
+                obj.co_firstlineno,
+                obj.co_linetable,
+                obj.co_exceptiontable,
+                obj.co_freevars,
+                obj.co_cellvars,
+            )
+        elif hasattr(obj, "co_linetable"):  # python 3.10 (16 args)
+            args = (
+                obj.co_argcount,
+                obj.co_posonlyargcount,
+                obj.co_kwonlyargcount,
+                obj.co_nlocals,
+                obj.co_stacksize,
+                obj.co_flags,
+                obj.co_code,
+                obj.co_consts,
+                obj.co_names,
+                obj.co_varnames,
+                obj.co_filename,
+                obj.co_name,
+                obj.co_firstlineno,
+                obj.co_linetable,
+                obj.co_freevars,
+                obj.co_cellvars,
+            )
+        elif hasattr(obj, "co_posonlyargcount"):  # python 3.8 (16 args)
+            args = (
+                obj.co_argcount,
+                obj.co_posonlyargcount,
+                obj.co_kwonlyargcount,
+                obj.co_nlocals,
+                obj.co_stacksize,
+                obj.co_flags,
+                obj.co_code,
+                obj.co_consts,
+                obj.co_names,
+                obj.co_varnames,
+                obj.co_filename,
+                obj.co_name,
+                obj.co_firstlineno,
+                obj.co_lnotab,
+                obj.co_freevars,
+                obj.co_cellvars,
+            )
+        else:  # python 3.7 (15 args)
+            args = (
+                obj.co_argcount,
+                obj.co_kwonlyargcount,
+                obj.co_nlocals,
+                obj.co_stacksize,
+                obj.co_flags,
+                obj.co_code,
+                obj.co_consts,
+                obj.co_names,
+                obj.co_varnames,
+                obj.co_filename,
+                obj.co_name,
+                obj.co_firstlineno,
+                obj.co_lnotab,
+                obj.co_freevars,
+                obj.co_cellvars,
+            )
 
-    pickler.save_reduce(types.CodeType, args, obj=obj)
+        pickler.save_reduce(types.CodeType, args, obj=obj)
diff --git a/tfx_bsl/beam/run_inference.py b/tfx_bsl/beam/run_inference.py
index 8c5d42b0..a95fdee3 100644
--- a/tfx_bsl/beam/run_inference.py
+++ b/tfx_bsl/beam/run_inference.py
@@ -15,46 +15,65 @@
 
 import abc
 import base64
-from concurrent import futures
 import functools
 import importlib
 import os
-from typing import Any, Callable, Dict, Iterable, List, Mapping, NamedTuple, Optional, Sequence, Text, Tuple, TypeVar, Union
+from concurrent import futures
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    Mapping,
+    NamedTuple,
+    Optional,
+    Sequence,
+    Text,
+    Tuple,
+    TypeVar,
+    Union,
+)
 
-from absl import logging
 import apache_beam as beam
+import googleapiclient
+import numpy as np
+import tensorflow as tf
+from absl import logging
 from apache_beam.ml.inference import base
 from apache_beam.options.pipeline_options import GoogleCloudOptions
 from apache_beam.transforms import resources
 from apache_beam.utils import retry
-import googleapiclient
-from googleapiclient import discovery
-from googleapiclient import http
-import numpy as np
-import tensorflow as tf
-from tfx_bsl.public.proto import model_spec_pb2
-from tfx_bsl.telemetry import util
+from googleapiclient import discovery, http
 
 # TODO(b/140306674): stop using the internal TF API.
-from tensorflow.python.saved_model import loader_impl  # pylint: disable=g-direct-tensorflow-import
-from tensorflow_serving.apis import classification_pb2
-from tensorflow_serving.apis import prediction_log_pb2
-from tensorflow_serving.apis import regression_pb2
+from tensorflow.python.saved_model import (
+    loader_impl,  # pylint: disable=g-direct-tensorflow-import
+)
+from tensorflow_serving.apis import (
+    classification_pb2,
+    prediction_log_pb2,
+    regression_pb2,
+)
 
+from tfx_bsl.public.proto import model_spec_pb2
+from tfx_bsl.telemetry import util
 
 # TODO(b/131873699): Remove once 1.x support is dropped.
 try:
-  # pylint: disable=g-import-not-at-top
-  # We need to import this in order to register all quantiles ops, even though
-  # it's not directly used.
-  from tensorflow.contrib.boosted_trees.python.ops import quantile_ops as _  # pylint: disable=unused-import
+    # pylint: disable=g-import-not-at-top
+    # We need to import this in order to register all quantiles ops, even though
+    # it's not directly used.
+    from tensorflow.contrib.boosted_trees.python.ops import (
+        quantile_ops as _,  # pylint: disable=unused-import
+    )
 except ImportError:
-  pass
+    pass
 
 
-_METRICS_DESCRIPTOR_INFERENCE = 'BulkInferrer'
-_METRICS_DESCRIPTOR_IN_PROCESS = 'InProcess'
-_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = 'CloudAIPlatformPrediction'
+_METRICS_DESCRIPTOR_INFERENCE = "BulkInferrer"
+_METRICS_DESCRIPTOR_IN_PROCESS = "InProcess"
+_METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION = "CloudAIPlatformPrediction"
 _REMOTE_INFERENCE_NUM_RETRIES = 5
 
 # We define the following aliases of Any because the actual types are not
@@ -65,1131 +84,1275 @@
 
 
 # TODO(b/151468119): Converts this into enum once we stop supporting Python 2.7
-class _OperationType(object):
-  CLASSIFICATION = 'CLASSIFICATION'
-  REGRESSION = 'REGRESSION'
-  MULTI_INFERENCE = 'MULTI_INFERENCE'
-  PREDICTION = 'PREDICTION'
+class _OperationType:
+    CLASSIFICATION = "CLASSIFICATION"
+    REGRESSION = "REGRESSION"
+    MULTI_INFERENCE = "MULTI_INFERENCE"
+    PREDICTION = "PREDICTION"
 
 
-_K = TypeVar('_K')
+_K = TypeVar("_K")
 InputType = Union[tf.train.Example, tf.train.SequenceExample, bytes]
 LoadOverrideFnType = Callable[[str, Sequence[str]], Any]
 _OUTPUT_TYPE = prediction_log_pb2.PredictionLog
 
 
 def _is_list_type(input_type: beam.typehints.typehints.TypeConstraint) -> bool:
-  if hasattr(input_type, 'inner_type'):
-    return input_type == beam.typehints.List[input_type.inner_type]
-  return False
+    if hasattr(input_type, "inner_type"):
+        return input_type == beam.typehints.List[input_type.inner_type]
+    return False
 
 
 def _key_and_result_type(input_type: beam.typehints.typehints.TypeConstraint):
-  """Get typehints for key and result type given an input typehint."""
-  tuple_types = getattr(input_type, 'tuple_types', None)
-  if tuple_types is not None and len(tuple_types) == 2:
-    key_type = tuple_types[0]
-    value_type = tuple_types[1]
-  else:
-    key_type = None
-    value_type = input_type
-  if _is_list_type(value_type):
-    result_type = beam.typehints.List[_OUTPUT_TYPE]
-  else:
-    result_type = _OUTPUT_TYPE
-  return key_type, result_type
+    """Get typehints for key and result type given an input typehint."""
+    tuple_types = getattr(input_type, "tuple_types", None)
+    if tuple_types is not None and len(tuple_types) == 2:
+        key_type = tuple_types[0]
+        value_type = tuple_types[1]
+    else:
+        key_type = None
+        value_type = input_type
+    if _is_list_type(value_type):
+        result_type = beam.typehints.List[_OUTPUT_TYPE]
+    else:
+        result_type = _OUTPUT_TYPE
+    return key_type, result_type
 
 
 def _using_in_process_inference(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> bool:
-  return inference_spec_type.WhichOneof('type') == 'saved_model_spec'
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+) -> bool:
+    return inference_spec_type.WhichOneof("type") == "saved_model_spec"
 
 
 def create_model_handler(
     inference_spec_type: model_spec_pb2.InferenceSpecType,
     load_override_fn: Optional[LoadOverrideFnType],
-    options_project_id: Optional[str]) -> base.ModelHandler:
-  """Creates a ModelHandler based on the InferenceSpecType.
+    options_project_id: Optional[str],
+) -> base.ModelHandler:
+    """Creates a ModelHandler based on the InferenceSpecType.
 
-  Args:
-    inference_spec_type: Model inference endpoint.
-    load_override_fn: An option function to load the model, only used with
-      saved models.
-    options_project_id: The project id from pipeline options, only used if
-      there was no project_id specified in the inference_spec_type proto.
-
-  Returns:
-    A ModelHandler appropriate for the inference_spec_type.
-  """
-  if _using_in_process_inference(inference_spec_type):
-    return _get_saved_model_handler(inference_spec_type, load_override_fn)
-  return _RemotePredictModelHandler(inference_spec_type, options_project_id)
+    Args:
+    ----
+      inference_spec_type: Model inference endpoint.
+      load_override_fn: An option function to load the model, only used with
+        saved models.
+      options_project_id: The project id from pipeline options, only used if
+        there was no project_id specified in the inference_spec_type proto.
+
+    Returns:
+    -------
+      A ModelHandler appropriate for the inference_spec_type.
+    """
+    if _using_in_process_inference(inference_spec_type):
+        return _get_saved_model_handler(inference_spec_type, load_override_fn)
+    return _RemotePredictModelHandler(inference_spec_type, options_project_id)
 
 
 # Output type is inferred from input.
-@beam.typehints.with_input_types(Union[InputType, Tuple[_K, InputType],
-                                       Tuple[_K, List[InputType]]])
+@beam.typehints.with_input_types(
+    Union[InputType, Tuple[_K, InputType], Tuple[_K, List[InputType]]]
+)
 class RunInferenceImpl(beam.PTransform):
-  """Implementation of RunInference API."""
-
-  def __init__(self,
-               inference_spec_type: model_spec_pb2.InferenceSpecType,
-               load_override_fn: Optional[LoadOverrideFnType] = None):
-    """Initializes transform.
-
-    Args:
-      inference_spec_type: InferenceSpecType proto.
-      load_override_fn: If provided, overrides the model loader fn of the
-        underlying ModelHandler. This takes a model path and sequence of tags,
-        and should return a model with interface compatible with tf.SavedModel.
-    """
-    self._inference_spec_type = inference_spec_type
-    self._load_override_fn = load_override_fn
-
-  # LINT.IfChange(close_to_resources)
-  @staticmethod
-  def _model_size_bytes(path: str) -> int:
-    # We might be unable to compute the size of the model during pipeline
-    # construction, but the model might still be accessible during pipeline
-    # execution. In such cases we will provide a default value for the model
-    # size. In general, it is a lot more costly to underestimate the size of
-    # the model than to overestimate it.
-    default_model_size = 1 << 30  # 1 GB.
-
-    def file_size(directory, file):
-      return max(tf.io.gfile.stat(os.path.join(directory, file)).length, 0)
-
-    try:
-      result = 0
-      with futures.ThreadPoolExecutor() as executor:
-        for directory, _, files in tf.io.gfile.walk(path):
-          result += sum(
-              executor.map(functools.partial(file_size, directory), files))
-      if result == 0:
-        result = default_model_size
-      return result
-    except OSError:
-      return default_model_size
-
-  @staticmethod
-  def _make_close_to_resources(
-      inference_spec_type: model_spec_pb2.InferenceSpecType) -> str:
-    """Proximity resources not otherwise known (or visible) to Beam."""
-
-    if _using_in_process_inference(inference_spec_type):
-      # The model is expected to be loaded once per worker (as opposed to
-      # once per thread), due to the use of beam.Shared in pertinent DoFns.
-      #
-      # The exact value of this constant is not important; it aims to signify
-      # that there might be a non-trivial number of model loads.
-      #
-      # TODO(katsiapis): Auto(tune) this.
-      estimated_num_workers = 100
-      model_path = inference_spec_type.saved_model_spec.model_path
-      model_size_bytes = RunInferenceImpl._model_size_bytes(model_path)
-      return f'{model_path}[{model_size_bytes * estimated_num_workers}]'
-    else:
-      # The model is available remotely, so the size of the RPC traffic is
-      # proportional to the size of the input.
-      #
-      # The exact value of this constant is not important; it aims to signify
-      # that there might be a non-trivial amount of RPC traffic.
-      #
-      # TODO(katsiapis): Auto(tune) this.
-      estimated_rpc_traffic_size_bytes = 1 << 40  # 1 TB.
-
-      # TODO(katsiapis): Is it possible to query the AI platform to see what
-      # zones the model is available in, so that we can instead provide a
-      # descriptor along the lines of: f'zone1|zone2|...|zoneN[size]'?
-      del estimated_rpc_traffic_size_bytes
-      return ''
-  # LINT.ThenChange(../../../../learning/serving/contrib/servables/tensorflow/flume/bulk-inference.cc:close_to_resources)
-
-  def infer_output_type(self, input_type):
-    key_type, result_type = _key_and_result_type(input_type)
-    if key_type is not None:
-      return beam.typehints.Tuple[key_type, result_type]
-    return result_type
-
-  def expand(self, examples: beam.PCollection) -> beam.PCollection:
-    logging.info('RunInference on model: %s', self._inference_spec_type)
-    output_type = self.infer_output_type(examples.element_type)
-    # TODO(b/217271822): Do this unconditionally after BEAM-13690 is resolved.
-    if resources.ResourceHint.is_registered('close_to_resources'):
-      examples |= (
-          'CloseToResources' >> beam.Map(lambda x: x).with_resource_hints(
-              close_to_resources=self._make_close_to_resources(
-                  self._inference_spec_type)))
-    handler = create_model_handler(
-        self._inference_spec_type, self._load_override_fn,
-        examples.pipeline.options.view_as(GoogleCloudOptions).project)
-    handler = _ModelHandlerWrapper(handler)
-    return examples | 'BulkInference' >> base.RunInference(
-        handler).with_output_types(output_type)
+    """Implementation of RunInference API."""
+
+    def __init__(
+        self,
+        inference_spec_type: model_spec_pb2.InferenceSpecType,
+        load_override_fn: Optional[LoadOverrideFnType] = None,
+    ):
+        """Initializes transform.
+
+        Args:
+        ----
+          inference_spec_type: InferenceSpecType proto.
+          load_override_fn: If provided, overrides the model loader fn of the
+            underlying ModelHandler. This takes a model path and sequence of tags,
+            and should return a model with interface compatible with tf.SavedModel.
+        """
+        self._inference_spec_type = inference_spec_type
+        self._load_override_fn = load_override_fn
+
+    # LINT.IfChange(close_to_resources)
+    @staticmethod
+    def _model_size_bytes(path: str) -> int:
+        # We might be unable to compute the size of the model during pipeline
+        # construction, but the model might still be accessible during pipeline
+        # execution. In such cases we will provide a default value for the model
+        # size. In general, it is a lot more costly to underestimate the size of
+        # the model than to overestimate it.
+        default_model_size = 1 << 30  # 1 GB.
+
+        def file_size(directory, file):
+            return max(tf.io.gfile.stat(os.path.join(directory, file)).length, 0)
+
+        try:
+            result = 0
+            with futures.ThreadPoolExecutor() as executor:
+                for directory, _, files in tf.io.gfile.walk(path):
+                    result += sum(
+                        executor.map(functools.partial(file_size, directory), files)
+                    )
+            if result == 0:
+                result = default_model_size
+            return result
+        except OSError:
+            return default_model_size
+
+    @staticmethod
+    def _make_close_to_resources(
+        inference_spec_type: model_spec_pb2.InferenceSpecType,
+    ) -> str:
+        """Proximity resources not otherwise known (or visible) to Beam."""
+        if _using_in_process_inference(inference_spec_type):
+            # The model is expected to be loaded once per worker (as opposed to
+            # once per thread), due to the use of beam.Shared in pertinent DoFns.
+            #
+            # The exact value of this constant is not important; it aims to signify
+            # that there might be a non-trivial number of model loads.
+            #
+            # TODO(katsiapis): Auto(tune) this.
+            estimated_num_workers = 100
+            model_path = inference_spec_type.saved_model_spec.model_path
+            model_size_bytes = RunInferenceImpl._model_size_bytes(model_path)
+            return f"{model_path}[{model_size_bytes * estimated_num_workers}]"
+        else:
+            # The model is available remotely, so the size of the RPC traffic is
+            # proportional to the size of the input.
+            #
+            # The exact value of this constant is not important; it aims to signify
+            # that there might be a non-trivial amount of RPC traffic.
+            #
+            # TODO(katsiapis): Auto(tune) this.
+            estimated_rpc_traffic_size_bytes = 1 << 40  # 1 TB.
+
+            # TODO(katsiapis): Is it possible to query the AI platform to see what
+            # zones the model is available in, so that we can instead provide a
+            # descriptor along the lines of: f'zone1|zone2|...|zoneN[size]'?
+            del estimated_rpc_traffic_size_bytes
+            return ""
+
+    # LINT.ThenChange(../../../../learning/serving/contrib/servables/tensorflow/flume/bulk-inference.cc:close_to_resources)
+
+    def infer_output_type(self, input_type):
+        key_type, result_type = _key_and_result_type(input_type)
+        if key_type is not None:
+            return beam.typehints.Tuple[key_type, result_type]
+        return result_type
+
+    def expand(self, examples: beam.PCollection) -> beam.PCollection:
+        logging.info("RunInference on model: %s", self._inference_spec_type)
+        output_type = self.infer_output_type(examples.element_type)
+        # TODO(b/217271822): Do this unconditionally after BEAM-13690 is resolved.
+        if resources.ResourceHint.is_registered("close_to_resources"):
+            examples |= "CloseToResources" >> beam.Map(lambda x: x).with_resource_hints(
+                close_to_resources=self._make_close_to_resources(
+                    self._inference_spec_type
+                )
+            )
+        handler = create_model_handler(
+            self._inference_spec_type,
+            self._load_override_fn,
+            examples.pipeline.options.view_as(GoogleCloudOptions).project,
+        )
+        handler = _ModelHandlerWrapper(handler)
+        return examples | "BulkInference" >> base.RunInference(
+            handler
+        ).with_output_types(output_type)
 
 
 def _get_saved_model_handler(
     inference_spec_type: model_spec_pb2.InferenceSpecType,
-    load_override_fn: Optional[LoadOverrideFnType]) -> base.ModelHandler:
-  """Get an in-process ModelHandler."""
-  operation_type = _get_operation_type(inference_spec_type)
-  if operation_type == _OperationType.CLASSIFICATION:
-    return _ClassifyModelHandler(inference_spec_type, load_override_fn)
-  elif operation_type == _OperationType.REGRESSION:
-    return _RegressModelHandler(inference_spec_type, load_override_fn)
-  elif operation_type == _OperationType.MULTI_INFERENCE:
-    return _MultiInferenceModelHandler(inference_spec_type, load_override_fn)
-  elif operation_type == _OperationType.PREDICTION:
-    return _PredictModelHandler(inference_spec_type, load_override_fn)
-  else:
-    raise ValueError('Unsupported operation_type %s' % operation_type)
+    load_override_fn: Optional[LoadOverrideFnType],
+) -> base.ModelHandler:
+    """Get an in-process ModelHandler."""
+    operation_type = _get_operation_type(inference_spec_type)
+    if operation_type == _OperationType.CLASSIFICATION:
+        return _ClassifyModelHandler(inference_spec_type, load_override_fn)
+    elif operation_type == _OperationType.REGRESSION:
+        return _RegressModelHandler(inference_spec_type, load_override_fn)
+    elif operation_type == _OperationType.MULTI_INFERENCE:
+        return _MultiInferenceModelHandler(inference_spec_type, load_override_fn)
+    elif operation_type == _OperationType.PREDICTION:
+        return _PredictModelHandler(inference_spec_type, load_override_fn)
+    else:
+        raise ValueError("Unsupported operation_type %s" % operation_type)
 
 
 # Output type is inferred from input.
-@beam.typehints.with_input_types(Union[InputType, Tuple[_K, InputType],
-                                       Tuple[_K, List[InputType]]])
+@beam.typehints.with_input_types(
+    Union[InputType, Tuple[_K, InputType], Tuple[_K, List[InputType]]]
+)
 class RunInferencePerModelImpl(beam.PTransform):
-  """Implementation of the vectorized variant of the RunInference API."""
-
-  def __init__(self,
-               inference_spec_types: Iterable[model_spec_pb2.InferenceSpecType],
-               load_override_fn: Optional[LoadOverrideFnType] = None):
-    """Initializes transform.
-
-    Args:
-      inference_spec_types: InferenceSpecType proto.
-      load_override_fn: If provided, overrides the model loader fn of the
-        underlying ModelHandler. This takes a model path and sequence of tags,
-        and should return a model with interface compatible with tf.SavedModel.
-    """
-    self._inference_spec_types = tuple(inference_spec_types)
-    self._load_override_fn = load_override_fn
-
-  def infer_output_type(self, input_type):
-    key_type, result_type = _key_and_result_type(input_type)
-    result_type = beam.typehints.Tuple[(result_type,) *
-                                       len(self._inference_spec_types)]
-    if key_type is not None:
-      return beam.typehints.Tuple[key_type, result_type]
-    return result_type
-
-  def expand(self, examples: beam.PCollection) -> beam.PCollection:
-    output_type = self.infer_output_type(examples.element_type)
-
-    # TODO(b/217442215): Obviate the need for this block (and instead rely
-    # solely on the one within RunInferenceImpl::expand).
-    # TODO(b/217271822): Do this unconditionally after BEAM-13690 is resolved.
-    if resources.ResourceHint.is_registered('close_to_resources'):
-      examples |= (
-          'CloseToResources' >> beam.Map(lambda x: x).with_resource_hints(
-              close_to_resources=','.join([
-                  RunInferenceImpl._make_close_to_resources(s)  # pylint: disable=protected-access
-                  for s in self._inference_spec_types
-              ])))
-
-    tuple_types = getattr(examples.element_type, 'tuple_types', None)
-    if tuple_types is None or len(tuple_types) != 2:
-      # The input is not a KV, so pair with a dummy key, run the inferences, and
-      # drop the dummy key afterwards.
-      return (examples
-              | 'PairWithNone' >> beam.Map(lambda x: (None, x))
-              | 'ApplyOnKeyedInput' >> RunInferencePerModelImpl(
-                  self._inference_spec_types)
-              | 'DropNone' >> beam.Values().with_output_types(output_type))
-
-    def infer_iteration_output_type(input_type):
-      """Infers ouput typehint for Iteration Ptransform based on input_type."""
-      tuple_types = getattr(input_type, 'tuple_types', None)
-      output_tuple_components = []
-      if tuple_types is not None:
-        output_tuple_components.extend(tuple_types)
-        example_type = tuple_types[1]
-      else:
-        output_tuple_components.append(input_type)
-        example_type = input_type
-
-      if _is_list_type(example_type):
-        inference_result_type = beam.typehints.List[_OUTPUT_TYPE]
-      else:
-        inference_result_type = _OUTPUT_TYPE
-      output_tuple_components.append(inference_result_type)
-      return beam.typehints.Tuple[output_tuple_components]
-
-    @beam.ptransform_fn
-    def Iteration(pcoll, inference_spec_type):  # pylint: disable=invalid-name
-      return (pcoll
-              | 'PairWithInput' >> beam.Map(lambda x: (x, x[1]))
-              | 'RunInferenceImpl' >> RunInferenceImpl(inference_spec_type,
-                                                       self._load_override_fn)
-              | 'ExtendResults' >>
-              beam.MapTuple(lambda k, v: k + (v,)).with_output_types(
-                  infer_iteration_output_type(pcoll.element_type)))
-
-    result = examples
-    for i, inference_spec_type in enumerate(self._inference_spec_types):
-      result |= f'Model[{i}]' >> Iteration(inference_spec_type)  # pylint: disable=no-value-for-parameter
-    result |= 'ExtractResults' >> beam.Map(
-        lambda tup: (tup[0], tuple(tup[2:]))).with_output_types(output_type)
-    return result
-
-
-_IOTensorSpec = NamedTuple('_IOTensorSpec',
-                           [('input_tensor_alias', Text),
-                            ('input_tensor_name', Text),
-                            ('output_alias_tensor_names', Dict[Text, Text])])
-
-_Signature = NamedTuple('_Signature', [('name', Text),
-                                       ('signature_def', _SignatureDef)])
+    """Implementation of the vectorized variant of the RunInference API."""
+
+    def __init__(
+        self,
+        inference_spec_types: Iterable[model_spec_pb2.InferenceSpecType],
+        load_override_fn: Optional[LoadOverrideFnType] = None,
+    ):
+        """Initializes transform.
+
+        Args:
+        ----
+          inference_spec_types: InferenceSpecType proto.
+          load_override_fn: If provided, overrides the model loader fn of the
+            underlying ModelHandler. This takes a model path and sequence of tags,
+            and should return a model with interface compatible with tf.SavedModel.
+        """
+        self._inference_spec_types = tuple(inference_spec_types)
+        self._load_override_fn = load_override_fn
+
+    def infer_output_type(self, input_type):
+        key_type, result_type = _key_and_result_type(input_type)
+        result_type = beam.typehints.Tuple[
+            (result_type,) * len(self._inference_spec_types)
+        ]
+        if key_type is not None:
+            return beam.typehints.Tuple[key_type, result_type]
+        return result_type
+
+    def expand(self, examples: beam.PCollection) -> beam.PCollection:
+        output_type = self.infer_output_type(examples.element_type)
+
+        # TODO(b/217442215): Obviate the need for this block (and instead rely
+        # solely on the one within RunInferenceImpl::expand).
+        # TODO(b/217271822): Do this unconditionally after BEAM-13690 is resolved.
+        if resources.ResourceHint.is_registered("close_to_resources"):
+            examples |= "CloseToResources" >> beam.Map(lambda x: x).with_resource_hints(
+                close_to_resources=",".join(
+                    [
+                        RunInferenceImpl._make_close_to_resources(s)  # pylint: disable=protected-access
+                        for s in self._inference_spec_types
+                    ]
+                )
+            )
+
+        tuple_types = getattr(examples.element_type, "tuple_types", None)
+        if tuple_types is None or len(tuple_types) != 2:
+            # The input is not a KV, so pair with a dummy key, run the inferences, and
+            # drop the dummy key afterwards.
+            return (
+                examples
+                | "PairWithNone" >> beam.Map(lambda x: (None, x))
+                | "ApplyOnKeyedInput"
+                >> RunInferencePerModelImpl(self._inference_spec_types)
+                | "DropNone" >> beam.Values().with_output_types(output_type)
+            )
+
+        def infer_iteration_output_type(input_type):
+            """Infers ouput typehint for Iteration Ptransform based on input_type."""
+            tuple_types = getattr(input_type, "tuple_types", None)
+            output_tuple_components = []
+            if tuple_types is not None:
+                output_tuple_components.extend(tuple_types)
+                example_type = tuple_types[1]
+            else:
+                output_tuple_components.append(input_type)
+                example_type = input_type
+
+            if _is_list_type(example_type):
+                inference_result_type = beam.typehints.List[_OUTPUT_TYPE]
+            else:
+                inference_result_type = _OUTPUT_TYPE
+            output_tuple_components.append(inference_result_type)
+            return beam.typehints.Tuple[output_tuple_components]
+
+        @beam.ptransform_fn
+        def Iteration(pcoll, inference_spec_type):  # pylint: disable=invalid-name
+            return (
+                pcoll
+                | "PairWithInput" >> beam.Map(lambda x: (x, x[1]))
+                | "RunInferenceImpl"
+                >> RunInferenceImpl(inference_spec_type, self._load_override_fn)
+                | "ExtendResults"
+                >> beam.MapTuple(lambda k, v: k + (v,)).with_output_types(
+                    infer_iteration_output_type(pcoll.element_type)
+                )
+            )
+
+        result = examples
+        for i, inference_spec_type in enumerate(self._inference_spec_types):
+            result |= f"Model[{i}]" >> Iteration(inference_spec_type)  # pylint: disable=no-value-for-parameter
+        result |= "ExtractResults" >> beam.Map(
+            lambda tup: (tup[0], tuple(tup[2:]))
+        ).with_output_types(output_type)
+        return result
+
+
+class _IOTensorSpec(NamedTuple):
+    input_tensor_alias: str
+    input_tensor_name: str
+    output_alias_tensor_names: Dict[str, str]
+
+
+class _Signature(NamedTuple):
+    name: str
+    signature_def: _SignatureDef
 
 
 def _retry_on_unavailable_and_resource_error_filter(exception: Exception):
-  """Retries for HttpError.
-
-  Retries if error is unavailable (503) or resource exhausted (429).
-  Resource exhausted may happen when qps or bandwidth exceeds quota.
+    """Retries for HttpError.
 
-  Args:
-    exception: Exception from inference http request execution.
+    Retries if error is unavailable (503) or resource exhausted (429).
+    Resource exhausted may happen when qps or bandwidth exceeds quota.
 
-  Returns:
-    A boolean of whether retry.
-  """
+    Args:
+    ----
+      exception: Exception from inference http request execution.
 
-  return (isinstance(exception, googleapiclient.errors.HttpError) and
-          exception.resp.status in (503, 429))
+    Returns:
+    -------
+      A boolean of whether retry.
+    """
+    return isinstance(
+        exception, googleapiclient.errors.HttpError
+    ) and exception.resp.status in (503, 429)
 
 
 class _BaseModelHandler(base.ModelHandler, metaclass=abc.ABCMeta):
-  """A basic TFX implementation of ModelHandler."""
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
-    super().__init__()
-    operation_type = _get_operation_type(inference_spec_type)
-    proximity_descriptor = (
-        _METRICS_DESCRIPTOR_IN_PROCESS
-        if _using_in_process_inference(inference_spec_type) else
-        _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION)
-    self._metrics_namespace = util.MakeTfxNamespace(
-        [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor])
-    self._batch_elements_kwargs = {}
-    for desc, val in inference_spec_type.batch_parameters.ListFields():
-      self._batch_elements_kwargs[desc.name] = val
-
-  def run_inference(
-      self,
-      examples: List[InputType],
-      model: Any,
-      inference_args=None) -> Iterable[prediction_log_pb2.PredictionLog]:
-    serialized_examples = [
-        e if isinstance(e, bytes) else e.SerializeToString() for e in examples
-    ]
-    self._check_examples(examples)
-    outputs = self._run_inference(examples, serialized_examples, model)
-    return self._post_process(examples, serialized_examples, outputs)
-
-  def _check_examples(self, examples):
-    pass
-
-  def get_num_bytes(
-      self, examples: Iterable[prediction_log_pb2.PredictionLog]) -> int:
-    serialized_examples = [
-        e if isinstance(e, bytes) else e.SerializeToString() for e in examples
-    ]
-    return sum(len(se) for se in serialized_examples)
+    """A basic TFX implementation of ModelHandler."""
+
+    def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType):
+        super().__init__()
+        operation_type = _get_operation_type(inference_spec_type)
+        proximity_descriptor = (
+            _METRICS_DESCRIPTOR_IN_PROCESS
+            if _using_in_process_inference(inference_spec_type)
+            else _METRICS_DESCRIPTOR_CLOUD_AI_PREDICTION
+        )
+        self._metrics_namespace = util.MakeTfxNamespace(
+            [_METRICS_DESCRIPTOR_INFERENCE, operation_type, proximity_descriptor]
+        )
+        self._batch_elements_kwargs = {}
+        for desc, val in inference_spec_type.batch_parameters.ListFields():
+            self._batch_elements_kwargs[desc.name] = val
+
+    def run_inference(
+        self, examples: List[InputType], model: Any, inference_args=None
+    ) -> Iterable[prediction_log_pb2.PredictionLog]:
+        serialized_examples = [
+            e if isinstance(e, bytes) else e.SerializeToString() for e in examples
+        ]
+        self._check_examples(examples)
+        outputs = self._run_inference(examples, serialized_examples, model)
+        return self._post_process(examples, serialized_examples, outputs)
+
+    def _check_examples(self, examples):
+        pass
+
+    def get_num_bytes(
+        self, examples: Iterable[prediction_log_pb2.PredictionLog]
+    ) -> int:
+        serialized_examples = [
+            e if isinstance(e, bytes) else e.SerializeToString() for e in examples
+        ]
+        return sum(len(se) for se in serialized_examples)
+
+    def get_metrics_namespace(self):
+        return self._metrics_namespace
+
+    def batch_elements_kwargs(self) -> Mapping[str, Any]:
+        return self._batch_elements_kwargs
+
+    @abc.abstractmethod
+    def _post_process(
+        self,
+        examples: List[InputType],
+        serialized_examples: List[bytes],
+        outputs: List[Mapping[str, Union[np.ndarray, Any]]],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def _run_inference(
+        self, examples: List[InputType], serialized_examples: List[bytes], model
+    ) -> List[Mapping[str, Any]]:
+        raise NotImplementedError
 
-  def get_metrics_namespace(self):
-    return self._metrics_namespace
 
-  def batch_elements_kwargs(self) -> Mapping[str, Any]:
-    return self._batch_elements_kwargs
+# TODO(b/151468119): Consider to re-batch with online serving request size
+# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
+class _RemotePredictModelHandler(_BaseModelHandler):
+    """Performs predictions from a cloud-hosted TensorFlow model.
 
-  @abc.abstractmethod
-  def _post_process(
-      self, examples: List[InputType], serialized_examples: List[bytes],
-      outputs: List[Mapping[Text, Union[np.ndarray, Any]]]
-  ) -> List[prediction_log_pb2.PredictionLog]:
-    raise NotImplementedError
+    Supports both batch and streaming processing modes.
+    NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
 
-  @abc.abstractmethod
-  def _run_inference(self, examples: List[InputType],
-                     serialized_examples: List[bytes],
-                     model) -> List[Mapping[Text, Any]]:
-    raise NotImplementedError
+    In order to request predictions, you must deploy your trained model to AI
+    Platform Prediction in the TensorFlow SavedModel format. See
+    [Exporting a SavedModel for prediction]
+    (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
+    for more details.
 
+    To send binary data, you have to make sure that the name of an input ends in
+    `_bytes`.
 
-# TODO(b/151468119): Consider to re-batch with online serving request size
-# limit, and re-batch with RPC failures(InvalidArgument) regarding request size.
-class _RemotePredictModelHandler(_BaseModelHandler):
-  """Performs predictions from a cloud-hosted TensorFlow model.
-
-  Supports both batch and streaming processing modes.
-  NOTE: Does not work on DirectRunner for streaming jobs [BEAM-7885].
-
-  In order to request predictions, you must deploy your trained model to AI
-  Platform Prediction in the TensorFlow SavedModel format. See
-  [Exporting a SavedModel for prediction]
-  (https://cloud.google.com/ai-platform/prediction/docs/exporting-savedmodel-for-prediction)
-  for more details.
-
-  To send binary data, you have to make sure that the name of an input ends in
-  `_bytes`.
-
-  NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
-  filled. The reason is that it is difficult to determine the input tensor name
-  without having access to cloud-hosted model's signatures.
-  """
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               pipeline_options_project_id: Optional[str]):
-    super().__init__(inference_spec_type)
-    self._ai_platform_prediction_model_spec = (
-        inference_spec_type.ai_platform_prediction_model_spec)
-    self._api_client = None
-    project_id = (
-        inference_spec_type.ai_platform_prediction_model_spec.project_id or
-        pipeline_options_project_id)
-    if not project_id:
-      raise ValueError('Either a non-empty project id or project flag in '
-                       ' beam pipeline options needs be provided.')
-
-    model_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.model_name)
-    if not model_name:
-      raise ValueError('A non-empty model name must be provided.')
-
-    version_name = (
-        inference_spec_type.ai_platform_prediction_model_spec.version_name)
-    name_spec = 'projects/{}/models/{}'
-    # If version is not specified, the default version for a model is used.
-    if version_name:
-      name_spec += '/versions/{}'
-    self._full_model_name = name_spec.format(project_id, model_name,
-                                             version_name)
-
-  # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
-  @retry.with_exponential_backoff(
-      initial_delay_secs=1.0,
-      num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
-      retry_filter=_retry_on_unavailable_and_resource_error_filter)
-  def _execute_request(
-      self,
-      request: http.HttpRequest) -> Mapping[Text, Sequence[Mapping[Text, Any]]]:
-    result = request.execute()
-    if 'error' in result:
-      raise ValueError(result['error'])
-    return result
+    NOTE: The returned `PredictLog` instances do not have `PredictRequest` part
+    filled. The reason is that it is difficult to determine the input tensor name
+    without having access to cloud-hosted model's signatures.
+    """
 
-  def _make_instances(
-      self,
-      examples: List[Union[tf.train.Example, tf.train.SequenceExample]],
-      serialized_examples: List[bytes]
-      )-> List[Mapping[Text, Any]]:
-    if self._ai_platform_prediction_model_spec.use_serialization_config:
-      return [{'b64': base64.b64encode(se).decode()}
-              for se in serialized_examples]
-    else:
-      result = []
-      for example in examples:
-        instance = {}
-        for name, feature in example.features.feature.items():
-          attribute_kind = feature.WhichOneof('kind')
-          if attribute_kind is None:
-            continue
-          values = self._make_values(name, feature, attribute_kind)
-          instance[name] = values[0] if len(values) == 1 else values
-        result.append(instance)
-      return result
-
-  @staticmethod
-  def _make_values(name: Text, feature: Any, attribute_kind: Text) -> List[Any]:
-    values = getattr(feature, attribute_kind).value
-    if name.endswith('_bytes'):
-      return [{'b64': base64.b64encode(x).decode()} for x in values]
-    elif attribute_kind == 'bytes_list':
-      return [x.decode() for x in values]
-    else:
-      # Converts proto RepeatedScalarContainer to list so it is
-      # JSON-serializable.
-      return list(values)
-
-  def load_model(self):
-    # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
-    # user agent once custom header is supported in googleapiclient.
-    self._api_client = discovery.build('ml', 'v1')
-    # load_model returns a locally hosted model. Since all these inferences
-    # are run on vertexAI, no local model is present.
-    return None
-
-  def _check_examples(self, examples: List[InputType]):
-    # TODO(b/131873699): Add support for tf.train.SequenceExample even when
-    # use_serialization_config is not enabled (by appropriately modifying
-    # _make_instances).
-    allowed_types = (
-        (tf.train.Example, tf.train.SequenceExample, bytes)
-        if self._ai_platform_prediction_model_spec.use_serialization_config
-        else tf.train.Example)
-    if not all(isinstance(e, allowed_types) for e in examples):
-      raise NotImplementedError(
-          'RemotePredict supports raw and serialized tf.train.Example, raw and '
-          'serialized tf.SequenceExample and raw bytes (the '
-          'latter three only when use_serialization_config is true)')
-
-  def _run_inference(self, examples: List[InputType],
-                     serialized_examples: List[bytes],
-                     model) -> List[Mapping[Text, Any]]:
-    self._check_examples(examples)
-    body = {'instances': self._make_instances(examples, serialized_examples)}
-    if self._api_client is None:
-      raise ValueError(
-          'API client is not initialized. Call load_model() first.'
-      )
-    request = self._api_client.projects().predict(
-        name=self._full_model_name, body=body)
-    response = self._execute_request(request)
-    return response['predictions']
-
-  def _post_process(
-      self, examples: List[InputType], serialized_examples: List[bytes],
-      outputs: List[Mapping[Text,
-                            Any]]) -> List[prediction_log_pb2.PredictionLog]:
-    del examples
-    result = []
-    for i, serialized_example in enumerate(serialized_examples):
-      prediction_log = prediction_log_pb2.PredictionLog()
-      predict_log = prediction_log.predict_log
-      input_tensor_proto = predict_log.request.inputs[
-          tf.saved_model.PREDICT_INPUTS]
-      input_tensor_proto.dtype = tf.string.as_datatype_enum
-      input_tensor_proto.tensor_shape.dim.add().size = 1
-      input_tensor_proto.string_val.append(serialized_example)
-      for output_alias, values in outputs[i].items():
-        values = np.array(values)
-        tensor_proto = tf.make_tensor_proto(
-            values=values,
-            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
-            shape=np.expand_dims(values, axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(prediction_log)
-    return result
+    def __init__(
+        self,
+        inference_spec_type: model_spec_pb2.InferenceSpecType,
+        pipeline_options_project_id: Optional[str],
+    ):
+        super().__init__(inference_spec_type)
+        self._ai_platform_prediction_model_spec = (
+            inference_spec_type.ai_platform_prediction_model_spec
+        )
+        self._api_client = None
+        project_id = (
+            inference_spec_type.ai_platform_prediction_model_spec.project_id
+            or pipeline_options_project_id
+        )
+        if not project_id:
+            raise ValueError(
+                "Either a non-empty project id or project flag in "
+                " beam pipeline options needs be provided."
+            )
+
+        model_name = inference_spec_type.ai_platform_prediction_model_spec.model_name
+        if not model_name:
+            raise ValueError("A non-empty model name must be provided.")
+
+        version_name = (
+            inference_spec_type.ai_platform_prediction_model_spec.version_name
+        )
+        name_spec = "projects/{}/models/{}"
+        # If version is not specified, the default version for a model is used.
+        if version_name:
+            name_spec += "/versions/{}"
+        self._full_model_name = name_spec.format(project_id, model_name, version_name)
+
+    # Retry _REMOTE_INFERENCE_NUM_RETRIES times with exponential backoff.
+    @retry.with_exponential_backoff(
+        initial_delay_secs=1.0,
+        num_retries=_REMOTE_INFERENCE_NUM_RETRIES,
+        retry_filter=_retry_on_unavailable_and_resource_error_filter,
+    )
+    def _execute_request(
+        self, request: http.HttpRequest
+    ) -> Mapping[str, Sequence[Mapping[str, Any]]]:
+        result = request.execute()
+        if "error" in result:
+            raise ValueError(result["error"])
+        return result
+
+    def _make_instances(
+        self,
+        examples: List[Union[tf.train.Example, tf.train.SequenceExample]],
+        serialized_examples: List[bytes],
+    ) -> List[Mapping[str, Any]]:
+        if self._ai_platform_prediction_model_spec.use_serialization_config:
+            return [
+                {"b64": base64.b64encode(se).decode()} for se in serialized_examples
+            ]
+        else:
+            result = []
+            for example in examples:
+                instance = {}
+                for name, feature in example.features.feature.items():
+                    attribute_kind = feature.WhichOneof("kind")
+                    if attribute_kind is None:
+                        continue
+                    values = self._make_values(name, feature, attribute_kind)
+                    instance[name] = values[0] if len(values) == 1 else values
+                result.append(instance)
+            return result
+
+    @staticmethod
+    def _make_values(name: str, feature: Any, attribute_kind: str) -> List[Any]:
+        values = getattr(feature, attribute_kind).value
+        if name.endswith("_bytes"):
+            return [{"b64": base64.b64encode(x).decode()} for x in values]
+        elif attribute_kind == "bytes_list":
+            return [x.decode() for x in values]
+        else:
+            # Converts proto RepeatedScalarContainer to list so it is
+            # JSON-serializable.
+            return list(values)
+
+    def load_model(self):
+        # TODO(b/151468119): Add tfx_bsl_version and tfx_bsl_py_version to
+        # user agent once custom header is supported in googleapiclient.
+        self._api_client = discovery.build("ml", "v1")
+        # load_model returns a locally hosted model. Since all these inferences
+        # are run on vertexAI, no local model is present.
+        return
+
+    def _check_examples(self, examples: List[InputType]):
+        # TODO(b/131873699): Add support for tf.train.SequenceExample even when
+        # use_serialization_config is not enabled (by appropriately modifying
+        # _make_instances).
+        allowed_types = (
+            (tf.train.Example, tf.train.SequenceExample, bytes)
+            if self._ai_platform_prediction_model_spec.use_serialization_config
+            else tf.train.Example
+        )
+        if not all(isinstance(e, allowed_types) for e in examples):
+            raise NotImplementedError(
+                "RemotePredict supports raw and serialized tf.train.Example, raw and "
+                "serialized tf.SequenceExample and raw bytes (the "
+                "latter three only when use_serialization_config is true)"
+            )
+
+    def _run_inference(
+        self, examples: List[InputType], serialized_examples: List[bytes], model
+    ) -> List[Mapping[str, Any]]:
+        self._check_examples(examples)
+        body = {"instances": self._make_instances(examples, serialized_examples)}
+        if self._api_client is None:
+            raise ValueError("API client is not initialized. Call load_model() first.")
+        request = self._api_client.projects().predict(
+            name=self._full_model_name, body=body
+        )
+        response = self._execute_request(request)
+        return response["predictions"]
+
+    def _post_process(
+        self,
+        examples: List[InputType],
+        serialized_examples: List[bytes],
+        outputs: List[Mapping[str, Any]],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        del examples
+        result = []
+        for i, serialized_example in enumerate(serialized_examples):
+            prediction_log = prediction_log_pb2.PredictionLog()
+            predict_log = prediction_log.predict_log
+            input_tensor_proto = predict_log.request.inputs[
+                tf.saved_model.PREDICT_INPUTS
+            ]
+            input_tensor_proto.dtype = tf.string.as_datatype_enum
+            input_tensor_proto.tensor_shape.dim.add().size = 1
+            input_tensor_proto.string_val.append(serialized_example)
+            for output_alias, values in outputs[i].items():
+                values = np.array(values)
+                tensor_proto = tf.make_tensor_proto(
+                    values=values,
+                    dtype=tf.as_dtype(values.dtype).as_datatype_enum,
+                    shape=np.expand_dims(values, axis=0).shape,
+                )
+                predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+            result.append(prediction_log)
+        return result
 
 
 class _BaseSavedModelHandler(_BaseModelHandler):
-  """A spec that runs in-process batch inference with a model.
+    """A spec that runs in-process batch inference with a model.
 
     Models need to have the required serving signature as mentioned in
     [Tensorflow Serving](https://www.tensorflow.org/tfx/serving/signature_defs)
 
     This function will check model signatures first. Then it will load and run
     model inference in batch.
-  """
-
-  def __init__(self, inference_spec_type: model_spec_pb2.InferenceSpecType,
-               load_override_fn: Optional[LoadOverrideFnType]):
-    super().__init__(inference_spec_type)
-    self._inference_spec_type = inference_spec_type
-    self._model_path = inference_spec_type.saved_model_spec.model_path
-    if not self._model_path:
-      raise ValueError('Model path is not valid.')
-    self._tags = _get_tags(inference_spec_type)
-    self._signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name, self._tags)
-    self._io_tensor_spec = self._make_io_tensor_spec()
-    if self._has_tpu_tag():
-      # TODO(b/161563144): Support TPU inference.
-      raise NotImplementedError('TPU inference is not supported yet.')
-    self._load_override_fn = load_override_fn
-
-  def _has_tpu_tag(self) -> bool:
-    return (len(self._tags) == 2 and tf.saved_model.SERVING in self._tags and
-            tf.saved_model.TPU in self._tags)
-
-  # TODO(b/159982957): Replace this with a mechinism that registers any custom
-  # op.
-  def _maybe_register_addon_ops(self):
-
-    def _try_import(name):
-      try:
-        importlib.import_module(name)
-      except (ImportError, tf.errors.NotFoundError):
-        logging.info('%s is not available.', name)
-
-    _try_import('tensorflow_text')
-    _try_import('tensorflow_decision_forests')
-    _try_import('struct2tensor')
-
-  def load_model(self):
-    if self._load_override_fn:
-      return self._load_override_fn(self._model_path, self._tags)
-    self._maybe_register_addon_ops()
-    result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
-    tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
-    return result
+    """
 
-  def _make_io_tensor_spec(self) -> _IOTensorSpec:
-    # Pre process functions will validate for each signature.
-    io_tensor_specs = []
-    for signature in self._signatures:
-      if len(signature.signature_def.inputs) != 1:
-        raise ValueError('Signature should have 1 and only 1 inputs')
-      if (list(signature.signature_def.inputs.values())[0].dtype !=
-          tf.string.as_datatype_enum):
-        raise ValueError(
-            'Input dtype is expected to be %s, got %s' %
-            (tf.string.as_datatype_enum,
-             list(signature.signature_def.inputs.values())[0].dtype))
-      io_tensor_specs.append(_signature_pre_process(signature.signature_def))
-    input_tensor_name = ''
-    input_tensor_alias = ''
-    output_alias_tensor_names = {}
-    for io_tensor_spec in io_tensor_specs:
-      if not input_tensor_name:
-        input_tensor_name = io_tensor_spec.input_tensor_name
-        input_tensor_alias = io_tensor_spec.input_tensor_alias
-      elif input_tensor_name != io_tensor_spec.input_tensor_name:
-        raise ValueError('Input tensor must be the same for all Signatures.')
-      for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items(
-      ):
-        output_alias_tensor_names[alias] = tensor_name
-    if (not output_alias_tensor_names or not input_tensor_name or
-        not input_tensor_alias):
-      raise ValueError('No valid fetch tensors or feed tensors.')
-    return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                         output_alias_tensor_names)
-
-  def _run_inference(self, examples: List[InputType],  # pytype: disable=signature-mismatch  # overriding-return-type-checks
-                     serialized_examples: List[bytes],
-                     model: Any) -> Mapping[Text, np.ndarray]:
-    result = model.run(
-        self._io_tensor_spec.output_alias_tensor_names,
-        feed_dict={self._io_tensor_spec.input_tensor_name: serialized_examples})
-    if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
-      raise RuntimeError('Output length does not match fetches')
-    return result
+    def __init__(
+        self,
+        inference_spec_type: model_spec_pb2.InferenceSpecType,
+        load_override_fn: Optional[LoadOverrideFnType],
+    ):
+        super().__init__(inference_spec_type)
+        self._inference_spec_type = inference_spec_type
+        self._model_path = inference_spec_type.saved_model_spec.model_path
+        if not self._model_path:
+            raise ValueError("Model path is not valid.")
+        self._tags = _get_tags(inference_spec_type)
+        self._signatures = _get_signatures(
+            inference_spec_type.saved_model_spec.model_path,
+            inference_spec_type.saved_model_spec.signature_name,
+            self._tags,
+        )
+        self._io_tensor_spec = self._make_io_tensor_spec()
+        if self._has_tpu_tag():
+            # TODO(b/161563144): Support TPU inference.
+            raise NotImplementedError("TPU inference is not supported yet.")
+        self._load_override_fn = load_override_fn
+
+    def _has_tpu_tag(self) -> bool:
+        return (
+            len(self._tags) == 2
+            and tf.saved_model.SERVING in self._tags
+            and tf.saved_model.TPU in self._tags
+        )
+
+    # TODO(b/159982957): Replace this with a mechinism that registers any custom
+    # op.
+    def _maybe_register_addon_ops(self):
+        def _try_import(name):
+            try:
+                importlib.import_module(name)
+            except (ImportError, tf.errors.NotFoundError):
+                logging.info("%s is not available.", name)
+
+        _try_import("tensorflow_text")
+        _try_import("tensorflow_decision_forests")
+        _try_import("struct2tensor")
+
+    def load_model(self):
+        if self._load_override_fn:
+            return self._load_override_fn(self._model_path, self._tags)
+        self._maybe_register_addon_ops()
+        result = tf.compat.v1.Session(graph=tf.compat.v1.Graph())
+        tf.compat.v1.saved_model.loader.load(result, self._tags, self._model_path)
+        return result
+
+    def _make_io_tensor_spec(self) -> _IOTensorSpec:
+        # Pre process functions will validate for each signature.
+        io_tensor_specs = []
+        for signature in self._signatures:
+            if len(signature.signature_def.inputs) != 1:
+                raise ValueError("Signature should have 1 and only 1 inputs")
+            if (
+                list(signature.signature_def.inputs.values())[0].dtype
+                != tf.string.as_datatype_enum
+            ):
+                raise ValueError(
+                    "Input dtype is expected to be %s, got %s"
+                    % (
+                        tf.string.as_datatype_enum,
+                        list(signature.signature_def.inputs.values())[0].dtype,
+                    )
+                )
+            io_tensor_specs.append(_signature_pre_process(signature.signature_def))
+        input_tensor_name = ""
+        input_tensor_alias = ""
+        output_alias_tensor_names = {}
+        for io_tensor_spec in io_tensor_specs:
+            if not input_tensor_name:
+                input_tensor_name = io_tensor_spec.input_tensor_name
+                input_tensor_alias = io_tensor_spec.input_tensor_alias
+            elif input_tensor_name != io_tensor_spec.input_tensor_name:
+                raise ValueError("Input tensor must be the same for all Signatures.")
+            for alias, tensor_name in io_tensor_spec.output_alias_tensor_names.items():
+                output_alias_tensor_names[alias] = tensor_name
+        if (
+            not output_alias_tensor_names
+            or not input_tensor_name
+            or not input_tensor_alias
+        ):
+            raise ValueError("No valid fetch tensors or feed tensors.")
+        return _IOTensorSpec(
+            input_tensor_alias, input_tensor_name, output_alias_tensor_names
+        )
+
+    def _run_inference(
+        self,
+        examples: List[
+            InputType
+        ],  # pytype: disable=signature-mismatch  # overriding-return-type-checks
+        serialized_examples: List[bytes],
+        model: Any,
+    ) -> Mapping[str, np.ndarray]:
+        result = model.run(
+            self._io_tensor_spec.output_alias_tensor_names,
+            feed_dict={self._io_tensor_spec.input_tensor_name: serialized_examples},
+        )
+        if len(result) != len(self._io_tensor_spec.output_alias_tensor_names):
+            raise RuntimeError("Output length does not match fetches")
+        return result
 
 
 class _ClassifyModelHandler(_BaseSavedModelHandler):
-  """Implements a spec for classification."""
-
-  def _check_examples(self, examples: List[InputType]):
-    if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
-      raise ValueError(
-          'Classify only supports raw or serialized tf.train.Example')
-
-  def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
-      self, examples: List[Union[tf.train.Example,
-                                 bytes]], serialized_examples: List[bytes],
-      outputs: Mapping[Text,
-                       np.ndarray]) -> List[prediction_log_pb2.PredictionLog]:
-    del serialized_examples
-    # TODO(b/131873699): Can we fold prediction_log_pb2.PredictionLog building
-    # into _post_process_classify?
-    classifications = _post_process_classify(
-        self._io_tensor_spec.output_alias_tensor_names, examples, outputs)
-    result = []
-    for example, classification in zip(examples, classifications):
-      prediction_log = prediction_log_pb2.PredictionLog()
-      input_example = (prediction_log.classify_log.request.input.example_list
-                       .examples.add())
-      (input_example.ParseFromString
-       if isinstance(example, bytes)
-       else input_example.CopyFrom)(example)
-      (prediction_log.classify_log.response.result.classifications.add()
-       .CopyFrom(classification))
-      result.append(prediction_log)
-    return result
+    """Implements a spec for classification."""
+
+    def _check_examples(self, examples: List[InputType]):
+        if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
+            raise ValueError(
+                "Classify only supports raw or serialized tf.train.Example"
+            )
+
+    def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
+        self,
+        examples: List[Union[tf.train.Example, bytes]],
+        serialized_examples: List[bytes],
+        outputs: Mapping[str, np.ndarray],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        del serialized_examples
+        # TODO(b/131873699): Can we fold prediction_log_pb2.PredictionLog building
+        # into _post_process_classify?
+        classifications = _post_process_classify(
+            self._io_tensor_spec.output_alias_tensor_names, examples, outputs
+        )
+        result = []
+        for example, classification in zip(examples, classifications):
+            prediction_log = prediction_log_pb2.PredictionLog()
+            input_example = (
+                prediction_log.classify_log.request.input.example_list.examples.add()
+            )
+            (
+                input_example.ParseFromString
+                if isinstance(example, bytes)
+                else input_example.CopyFrom
+            )(example)
+            (
+                prediction_log.classify_log.response.result.classifications.add().CopyFrom(
+                    classification
+                )
+            )
+            result.append(prediction_log)
+        return result
 
 
 class _RegressModelHandler(_BaseSavedModelHandler):
-  """A DoFn that run inference on regression model."""
-
-  def _check_examples(self, examples: List[InputType]):
-    if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
-      raise ValueError(
-          'Regress only supports raw or serialized tf.train.Example')
-
-  def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
-      self, examples: List[Union[tf.train.Example,
-                                 bytes]], serialized_examples: List[bytes],
-      outputs: Mapping[Text,
-                       np.ndarray]) -> List[prediction_log_pb2.PredictionLog]:
-    del serialized_examples
-    # TODO(b/131873699): Can we fold prediction_log_pb2.PredictionLog building
-    # into _post_process_regress?
-    regressions = _post_process_regress(examples, outputs)
-    result = []
-    for example, regression in zip(examples, regressions):
-      prediction_log = prediction_log_pb2.PredictionLog()
-      input_example = (prediction_log.regress_log.request.input.example_list
-                       .examples.add())
-      (input_example.ParseFromString
-       if isinstance(example, bytes)
-       else input_example.CopyFrom)(example)
-      prediction_log.regress_log.response.result.regressions.add().CopyFrom(
-          regression)
-      result.append(prediction_log)
-    return result
+    """A DoFn that run inference on regression model."""
+
+    def _check_examples(self, examples: List[InputType]):
+        if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
+            raise ValueError("Regress only supports raw or serialized tf.train.Example")
+
+    def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
+        self,
+        examples: List[Union[tf.train.Example, bytes]],
+        serialized_examples: List[bytes],
+        outputs: Mapping[str, np.ndarray],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        del serialized_examples
+        # TODO(b/131873699): Can we fold prediction_log_pb2.PredictionLog building
+        # into _post_process_regress?
+        regressions = _post_process_regress(examples, outputs)
+        result = []
+        for example, regression in zip(examples, regressions):
+            prediction_log = prediction_log_pb2.PredictionLog()
+            input_example = (
+                prediction_log.regress_log.request.input.example_list.examples.add()
+            )
+            (
+                input_example.ParseFromString
+                if isinstance(example, bytes)
+                else input_example.CopyFrom
+            )(example)
+            prediction_log.regress_log.response.result.regressions.add().CopyFrom(
+                regression
+            )
+            result.append(prediction_log)
+        return result
 
 
 class _MultiInferenceModelHandler(_BaseSavedModelHandler):
-  """A DoFn that runs inference on multi-head model."""
-
-  def _check_examples(self, examples: List[InputType]):
-    if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
-      raise ValueError(
-          'Multi inference only supports raw or serialized tf.train.Example')
-
-  def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
-      self, examples: List[Union[tf.train.Example,
-                                 bytes]], serialized_examples: List[bytes],
-      outputs: Mapping[Text,
-                       np.ndarray]) -> List[prediction_log_pb2.PredictionLog]:
-    del serialized_examples
-    classifications = None
-    regressions = None
-    for signature in self._signatures:
-      signature_def = signature.signature_def
-      if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        classifications = _post_process_classify(
-            self._io_tensor_spec.output_alias_tensor_names, examples, outputs)
-      elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        regressions = _post_process_regress(examples, outputs)
-      else:
-        raise ValueError('Signature method %s is not supported for '
-                         'multi inference' % signature_def.method_name)
-    result = []
-    for i, example in enumerate(examples):
-      prediction_log = prediction_log_pb2.PredictionLog()
-      input_example = (prediction_log.multi_inference_log.request.input
-                       .example_list.examples.add())
-      (input_example.ParseFromString
-       if isinstance(example, bytes)
-       else input_example.CopyFrom)(example)
-      response = prediction_log.multi_inference_log.response
-      for signature in self._signatures:
-        signature_def = signature.signature_def
-        inference_result = response.results.add()
-        if (signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME and
-            classifications):
-          inference_result.classification_result.classifications.add().CopyFrom(
-              classifications[i])
-        elif (
-            signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME and
-            regressions):
-          inference_result.regression_result.regressions.add().CopyFrom(
-              regressions[i])
-        else:
-          raise ValueError('Signature method %s is not supported for '
-                           'multi inference' % signature_def.method_name)
-        inference_result.model_spec.signature_name = signature.name
-      if len(response.results) != len(self._signatures):
-        raise RuntimeError('Multi inference response result length does not '
-                           'match the number of signatures')
-      result.append(prediction_log)
-    return result
+    """A DoFn that runs inference on multi-head model."""
+
+    def _check_examples(self, examples: List[InputType]):
+        if not all(isinstance(e, (tf.train.Example, bytes)) for e in examples):
+            raise ValueError(
+                "Multi inference only supports raw or serialized tf.train.Example"
+            )
+
+    def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
+        self,
+        examples: List[Union[tf.train.Example, bytes]],
+        serialized_examples: List[bytes],
+        outputs: Mapping[str, np.ndarray],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        del serialized_examples
+        classifications = None
+        regressions = None
+        for signature in self._signatures:
+            signature_def = signature.signature_def
+            if signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+                classifications = _post_process_classify(
+                    self._io_tensor_spec.output_alias_tensor_names, examples, outputs
+                )
+            elif signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+                regressions = _post_process_regress(examples, outputs)
+            else:
+                raise ValueError(
+                    "Signature method %s is not supported for "
+                    "multi inference" % signature_def.method_name
+                )
+        result = []
+        for i, example in enumerate(examples):
+            prediction_log = prediction_log_pb2.PredictionLog()
+            input_example = prediction_log.multi_inference_log.request.input.example_list.examples.add()
+            (
+                input_example.ParseFromString
+                if isinstance(example, bytes)
+                else input_example.CopyFrom
+            )(example)
+            response = prediction_log.multi_inference_log.response
+            for signature in self._signatures:
+                signature_def = signature.signature_def
+                inference_result = response.results.add()
+                if (
+                    signature_def.method_name == tf.saved_model.CLASSIFY_METHOD_NAME
+                    and classifications
+                ):
+                    inference_result.classification_result.classifications.add().CopyFrom(
+                        classifications[i]
+                    )
+                elif (
+                    signature_def.method_name == tf.saved_model.REGRESS_METHOD_NAME
+                    and regressions
+                ):
+                    inference_result.regression_result.regressions.add().CopyFrom(
+                        regressions[i]
+                    )
+                else:
+                    raise ValueError(
+                        "Signature method %s is not supported for "
+                        "multi inference" % signature_def.method_name
+                    )
+                inference_result.model_spec.signature_name = signature.name
+            if len(response.results) != len(self._signatures):
+                raise RuntimeError(
+                    "Multi inference response result length does not "
+                    "match the number of signatures"
+                )
+            result.append(prediction_log)
+        return result
 
 
 class _PredictModelHandler(_BaseSavedModelHandler):
-  """A DoFn that runs inference on predict model."""
-
-  def _check_examples(self, examples: List[InputType]):
-    pass
-
-  def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
-      self, examples: List[InputType], serialized_examples: List[bytes],
-      outputs: Mapping[Text,
-                       np.ndarray]) -> List[prediction_log_pb2.PredictionLog]:
-    del examples
-    input_tensor_alias = self._io_tensor_spec.input_tensor_alias
-    signature_name = self._signatures[0].name
-    batch_size = len(serialized_examples)
-    for output_alias, output in outputs.items():
-      if len(output.shape) < 1 or output.shape[0] != batch_size:
-        raise ValueError(
-            'Expected output tensor %s to have at least one '
-            'dimension, with the first having a size equal to the input batch '
-            'size %s. Instead found %s' %
-            (output_alias, batch_size, output.shape))
-    result = []
-    for i, serialized_example in enumerate(serialized_examples):
-      prediction_log = prediction_log_pb2.PredictionLog()
-      predict_log = prediction_log.predict_log
-      input_tensor_proto = predict_log.request.inputs[input_tensor_alias]
-      input_tensor_proto.dtype = tf.string.as_datatype_enum
-      input_tensor_proto.tensor_shape.dim.add().size = 1
-      input_tensor_proto.string_val.append(serialized_example)
-      predict_log.request.model_spec.signature_name = signature_name
-      predict_log.response.model_spec.signature_name = signature_name
-      for output_alias, output in outputs.items():
-        # Mimic tensor::Split
-        values = output[i]
-        tensor_proto = tf.make_tensor_proto(
-            values=values,
-            dtype=tf.as_dtype(values.dtype).as_datatype_enum,
-            shape=np.expand_dims(values, axis=0).shape)
-        predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
-      result.append(prediction_log)
-    return result
+    """A DoFn that runs inference on predict model."""
+
+    def _check_examples(self, examples: List[InputType]):
+        pass
+
+    def _post_process(  # pytype: disable=signature-mismatch  # overriding-parameter-type-checks
+        self,
+        examples: List[InputType],
+        serialized_examples: List[bytes],
+        outputs: Mapping[str, np.ndarray],
+    ) -> List[prediction_log_pb2.PredictionLog]:
+        del examples
+        input_tensor_alias = self._io_tensor_spec.input_tensor_alias
+        signature_name = self._signatures[0].name
+        batch_size = len(serialized_examples)
+        for output_alias, output in outputs.items():
+            if len(output.shape) < 1 or output.shape[0] != batch_size:
+                raise ValueError(
+                    "Expected output tensor %s to have at least one "
+                    "dimension, with the first having a size equal to the input batch "
+                    "size %s. Instead found %s"
+                    % (output_alias, batch_size, output.shape)
+                )
+        result = []
+        for i, serialized_example in enumerate(serialized_examples):
+            prediction_log = prediction_log_pb2.PredictionLog()
+            predict_log = prediction_log.predict_log
+            input_tensor_proto = predict_log.request.inputs[input_tensor_alias]
+            input_tensor_proto.dtype = tf.string.as_datatype_enum
+            input_tensor_proto.tensor_shape.dim.add().size = 1
+            input_tensor_proto.string_val.append(serialized_example)
+            predict_log.request.model_spec.signature_name = signature_name
+            predict_log.response.model_spec.signature_name = signature_name
+            for output_alias, output in outputs.items():
+                # Mimic tensor::Split
+                values = output[i]
+                tensor_proto = tf.make_tensor_proto(
+                    values=values,
+                    dtype=tf.as_dtype(values.dtype).as_datatype_enum,
+                    shape=np.expand_dims(values, axis=0).shape,
+                )
+                predict_log.response.outputs[output_alias].CopyFrom(tensor_proto)
+            result.append(prediction_log)
+        return result
 
 
 def _post_process_classify(
-    output_alias_tensor_names: Mapping[Text, Text],
-    examples: List[tf.train.Example], outputs: Mapping[Text, np.ndarray]
+    output_alias_tensor_names: Mapping[str, str],
+    examples: List[tf.train.Example],
+    outputs: Mapping[str, np.ndarray],
 ) -> List[classification_pb2.Classifications]:
-  """Returns classifications from inference output."""
-
-  # This is to avoid error "The truth value of an array with
-  # more than one example is ambiguous."
-  has_classes = False
-  has_scores = False
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
-    classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
-    has_classes = True
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
-    scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
-    has_scores = True
-  if has_classes:
-    if classes.ndim != 2:
-      raise ValueError('Expected Tensor shape: [batch_size num_classes] but '
-                       'got %s' % classes.shape)
-    if classes.dtype != tf.string.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.string.as_numpy_dtype, classes.dtype))
-    if classes.shape[0] != len(examples):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(examples), classes.shape[0]))
-  if has_scores:
-    if scores.ndim != 2:
-      raise ValueError("""Expected Tensor shape: [batch_size num_classes] but
-        got %s""" % scores.shape)
-    if scores.dtype != tf.float32.as_numpy_dtype:
-      raise ValueError('Expected classes Tensor of %s. Got: %s' %
-                       (tf.float32.as_numpy_dtype, scores.dtype))
-    if scores.shape[0] != len(examples):
-      raise ValueError('Expected classes output batch size of %s, got %s' %
-                       (len(examples), scores.shape[0]))
-  num_classes = 0
-  if has_classes and has_scores:
-    if scores.shape[1] != classes.shape[1]:
-      raise ValueError('Tensors class and score should match in shape[1]. '
-                       'Got %s vs %s' % (classes.shape[1], scores.shape[1]))
-    num_classes = classes.shape[1]
-  elif has_classes:
-    num_classes = classes.shape[1]
-  elif has_scores:
-    num_classes = scores.shape[1]
-
-  result = []
-  for i in range(len(examples)):
-    classifications = classification_pb2.Classifications()
-    for c in range(num_classes):
-      klass = classifications.classes.add()
-      if has_classes:
-        klass.label = classes[i][c]
-      if has_scores:
-        klass.score = scores[i][c]
-    result.append(classifications)
-  return result
+    """Returns classifications from inference output."""
+    # This is to avoid error "The truth value of an array with
+    # more than one example is ambiguous."
+    has_classes = False
+    has_scores = False
+    if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in output_alias_tensor_names:
+        classes = outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES]
+        has_classes = True
+    if tf.saved_model.CLASSIFY_OUTPUT_SCORES in output_alias_tensor_names:
+        scores = outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES]
+        has_scores = True
+    if has_classes:
+        if classes.ndim != 2:
+            raise ValueError(
+                "Expected Tensor shape: [batch_size num_classes] but "
+                "got %s" % classes.shape
+            )
+        if classes.dtype != tf.string.as_numpy_dtype:
+            raise ValueError(
+                "Expected classes Tensor of %s. Got: %s"
+                % (tf.string.as_numpy_dtype, classes.dtype)
+            )
+        if classes.shape[0] != len(examples):
+            raise ValueError(
+                "Expected classes output batch size of %s, got %s"
+                % (len(examples), classes.shape[0])
+            )
+    if has_scores:
+        if scores.ndim != 2:
+            raise ValueError(
+                """Expected Tensor shape: [batch_size num_classes] but
+        got %s"""
+                % scores.shape
+            )
+        if scores.dtype != tf.float32.as_numpy_dtype:
+            raise ValueError(
+                "Expected classes Tensor of %s. Got: %s"
+                % (tf.float32.as_numpy_dtype, scores.dtype)
+            )
+        if scores.shape[0] != len(examples):
+            raise ValueError(
+                "Expected classes output batch size of %s, got %s"
+                % (len(examples), scores.shape[0])
+            )
+    num_classes = 0
+    if has_classes and has_scores:
+        if scores.shape[1] != classes.shape[1]:
+            raise ValueError(
+                "Tensors class and score should match in shape[1]. "
+                "Got %s vs %s" % (classes.shape[1], scores.shape[1])
+            )
+        num_classes = classes.shape[1]
+    elif has_classes:
+        num_classes = classes.shape[1]
+    elif has_scores:
+        num_classes = scores.shape[1]
+
+    result = []
+    for i in range(len(examples)):
+        classifications = classification_pb2.Classifications()
+        for c in range(num_classes):
+            klass = classifications.classes.add()
+            if has_classes:
+                klass.label = classes[i][c]
+            if has_scores:
+                klass.score = scores[i][c]
+        result.append(classifications)
+    return result
 
 
 def _post_process_regress(
-    examples: List[tf.train.Example],
-    outputs: Mapping[Text, np.ndarray]) -> List[regression_pb2.Regression]:
-  """Returns regressions from inference output."""
-
-  if tf.saved_model.REGRESS_OUTPUTS not in outputs:
-    raise ValueError('No regression outputs found in outputs: %s' %
-                     outputs.keys())
-  output = outputs[tf.saved_model.REGRESS_OUTPUTS]
-  batch_size = len(examples)
-  if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
-    raise ValueError("""Expected output Tensor shape to be either [batch_size]
-                     or [batch_size, 1] but got %s""" % output.shape)
-  if batch_size != output.shape[0]:
-    raise ValueError(
-        'Input batch size did not match output batch size: %s vs %s' %
-        (batch_size, output.shape[0]))
-  if output.dtype != tf.float32.as_numpy_dtype:
-    raise ValueError('Expected output Tensor of %s. Got: %s' %
-                     (tf.float32.as_numpy_dtype, output.dtype))
-  if output.size != batch_size:
-    raise ValueError('Expected output batch size to be %s. Got: %s' %
-                     (batch_size, output.size))
-  flatten_output = output.flatten()
-  result = []
-  for value in flatten_output:
-    regression = regression_pb2.Regression()
-    regression.value = value
-    result.append(regression)
-  # Add additional check to save downstream consumer checks.
-  if len(result) != len(examples):
-    raise RuntimeError('Regression length does not match examples')
-  return result
+    examples: List[tf.train.Example], outputs: Mapping[str, np.ndarray]
+) -> List[regression_pb2.Regression]:
+    """Returns regressions from inference output."""
+    if tf.saved_model.REGRESS_OUTPUTS not in outputs:
+        raise ValueError("No regression outputs found in outputs: %s" % outputs.keys())
+    output = outputs[tf.saved_model.REGRESS_OUTPUTS]
+    batch_size = len(examples)
+    if not (output.ndim == 1 or (output.ndim == 2 and output.shape[1] == 1)):
+        raise ValueError(
+            """Expected output Tensor shape to be either [batch_size]
+                     or [batch_size, 1] but got %s"""
+            % output.shape
+        )
+    if batch_size != output.shape[0]:
+        raise ValueError(
+            "Input batch size did not match output batch size: %s vs %s"
+            % (batch_size, output.shape[0])
+        )
+    if output.dtype != tf.float32.as_numpy_dtype:
+        raise ValueError(
+            "Expected output Tensor of %s. Got: %s"
+            % (tf.float32.as_numpy_dtype, output.dtype)
+        )
+    if output.size != batch_size:
+        raise ValueError(
+            "Expected output batch size to be %s. Got: %s" % (batch_size, output.size)
+        )
+    flatten_output = output.flatten()
+    result = []
+    for value in flatten_output:
+        regression = regression_pb2.Regression()
+        regression.value = value
+        result.append(regression)
+    # Add additional check to save downstream consumer checks.
+    if len(result) != len(examples):
+        raise RuntimeError("Regression length does not match examples")
+    return result
 
 
 def _signature_pre_process(signature: _SignatureDef) -> _IOTensorSpec:
-  """Returns IOTensorSpec from signature."""
-
-  if len(signature.inputs) != 1:
-    raise ValueError('Signature should have 1 and only 1 inputs')
-  input_tensor_alias = list(signature.inputs.keys())[0]
-  if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
-    raise ValueError(
-        'Input dtype is expected to be %s, got %s' % tf.string.as_datatype_enum,
-        list(signature.inputs.values())[0].dtype)
-  if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_classify(signature))
-  elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_regress(signature))
-  elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
-    input_tensor_name, output_alias_tensor_names = (
-        _signature_pre_process_predict(signature))
-  else:
-    raise ValueError('Signature method %s is not supported' %
-                     signature.method_name)
-  return _IOTensorSpec(input_tensor_alias, input_tensor_name,
-                       output_alias_tensor_names)
+    """Returns IOTensorSpec from signature."""
+    if len(signature.inputs) != 1:
+        raise ValueError("Signature should have 1 and only 1 inputs")
+    input_tensor_alias = list(signature.inputs.keys())[0]
+    if list(signature.inputs.values())[0].dtype != tf.string.as_datatype_enum:
+        raise ValueError(
+            "Input dtype is expected to be %s, got %s" % tf.string.as_datatype_enum,
+            list(signature.inputs.values())[0].dtype,
+        )
+    if signature.method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+        input_tensor_name, output_alias_tensor_names = _signature_pre_process_classify(
+            signature
+        )
+    elif signature.method_name == tf.saved_model.REGRESS_METHOD_NAME:
+        input_tensor_name, output_alias_tensor_names = _signature_pre_process_regress(
+            signature
+        )
+    elif signature.method_name == tf.saved_model.PREDICT_METHOD_NAME:
+        input_tensor_name, output_alias_tensor_names = _signature_pre_process_predict(
+            signature
+        )
+    else:
+        raise ValueError("Signature method %s is not supported" % signature.method_name)
+    return _IOTensorSpec(
+        input_tensor_alias, input_tensor_name, output_alias_tensor_names
+    )
 
 
 def _signature_pre_process_classify(
-    signature: _SignatureDef) -> Tuple[Text, Dict[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  if len(signature.outputs) != 1 and len(signature.outputs) != 2:
-    raise ValueError('Classify signature should have 1 or 2 outputs')
-  if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
-    raise ValueError('No classification inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
-  output_alias_tensor_names = {}
-  if (tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs and
-      tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs):
-    raise ValueError(
-        """Expected classification signature outputs to contain at
-        least one of %s or %s. Signature was: %s""" %
-        tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
-        tf.saved_model.CLASSIFY_OUTPUT_SCORES, signature)
-  if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name)
-  if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
-    output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
-        signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name)
-  return input_tensor_name, output_alias_tensor_names
+    signature: _SignatureDef,
+) -> Tuple[str, Dict[str, str]]:
+    """Returns input tensor name and output alias tensor names from signature.
+
+    Args:
+    ----
+      signature: SignatureDef
+
+    Returns:
+    -------
+      A tuple of input tensor name and output alias tensor names.
+    """
+    if len(signature.outputs) != 1 and len(signature.outputs) != 2:
+        raise ValueError("Classify signature should have 1 or 2 outputs")
+    if tf.saved_model.CLASSIFY_INPUTS not in signature.inputs:
+        raise ValueError(
+            "No classification inputs found in SignatureDef: %s" % signature.inputs
+        )
+    input_tensor_name = signature.inputs[tf.saved_model.CLASSIFY_INPUTS].name
+    output_alias_tensor_names = {}
+    if (
+        tf.saved_model.CLASSIFY_OUTPUT_CLASSES not in signature.outputs
+        and tf.saved_model.CLASSIFY_OUTPUT_SCORES not in signature.outputs
+    ):
+        raise ValueError(
+            """Expected classification signature outputs to contain at
+        least one of %s or %s. Signature was: %s"""
+            % tf.saved_model.CLASSIFY_OUTPUT_CLASSES,
+            tf.saved_model.CLASSIFY_OUTPUT_SCORES,
+            signature,
+        )
+    if tf.saved_model.CLASSIFY_OUTPUT_CLASSES in signature.outputs:
+        output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_CLASSES] = (
+            signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_CLASSES].name
+        )
+    if tf.saved_model.CLASSIFY_OUTPUT_SCORES in signature.outputs:
+        output_alias_tensor_names[tf.saved_model.CLASSIFY_OUTPUT_SCORES] = (
+            signature.outputs[tf.saved_model.CLASSIFY_OUTPUT_SCORES].name
+        )
+    return input_tensor_name, output_alias_tensor_names
 
 
 def _signature_pre_process_regress(
-    signature: _SignatureDef) -> Tuple[Text, Dict[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  if len(signature.outputs) != 1:
-    raise ValueError('Regress signature should have 1 output')
-  if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
-    raise ValueError('No regression inputs found in SignatureDef: %s' %
-                     signature.inputs)
-  input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
-  if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
-    raise ValueError('No regression outputs found in SignatureDef: %s' %
-                     signature.outputs)
-  output_alias_tensor_names = {
-      tf.saved_model.REGRESS_OUTPUTS:
-          signature.outputs[tf.saved_model.REGRESS_OUTPUTS].name
-  }
-  return input_tensor_name, output_alias_tensor_names
+    signature: _SignatureDef,
+) -> Tuple[str, Dict[str, str]]:
+    """Returns input tensor name and output alias tensor names from signature.
+
+    Args:
+    ----
+      signature: SignatureDef
+
+    Returns:
+    -------
+      A tuple of input tensor name and output alias tensor names.
+    """
+    if len(signature.outputs) != 1:
+        raise ValueError("Regress signature should have 1 output")
+    if tf.saved_model.REGRESS_INPUTS not in signature.inputs:
+        raise ValueError(
+            "No regression inputs found in SignatureDef: %s" % signature.inputs
+        )
+    input_tensor_name = signature.inputs[tf.saved_model.REGRESS_INPUTS].name
+    if tf.saved_model.REGRESS_OUTPUTS not in signature.outputs:
+        raise ValueError(
+            "No regression outputs found in SignatureDef: %s" % signature.outputs
+        )
+    output_alias_tensor_names = {
+        tf.saved_model.REGRESS_OUTPUTS: signature.outputs[
+            tf.saved_model.REGRESS_OUTPUTS
+        ].name
+    }
+    return input_tensor_name, output_alias_tensor_names
 
 
 def _signature_pre_process_predict(
-    signature: _SignatureDef) -> Tuple[Text, Dict[Text, Text]]:
-  """Returns input tensor name and output alias tensor names from signature.
-
-  Args:
-    signature: SignatureDef
-
-  Returns:
-    A tuple of input tensor name and output alias tensor names.
-  """
-
-  input_tensor_name = list(signature.inputs.values())[0].name
-  output_alias_tensor_names = dict([
-      (key, output.name) for key, output in signature.outputs.items()
-  ])
-  return input_tensor_name, output_alias_tensor_names
-
-
-def _get_signatures(model_path: Text, signatures: Sequence[Text],
-                    tags: Sequence[Text]) -> Sequence[_Signature]:
-  """Returns a sequence of {model_signature_name: signature}."""
-
-  if signatures:
-    signature_names = signatures
-  else:
-    signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
-
-  saved_model_pb = loader_impl.parse_saved_model(model_path)
-  meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
-  result = []
-  for signature_name in signature_names:
-    if signature_name in meta_graph_def.signature_def:
-      result.append(
-          _Signature(signature_name,
-                     meta_graph_def.signature_def[signature_name]))
-    else:
-      raise RuntimeError('Signature %s could not be found in SavedModel' %
-                         signature_name)
-  return result
-
-
-def _get_operation_type(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Text:
-  if _using_in_process_inference(inference_spec_type):
-    signatures = _get_signatures(
-        inference_spec_type.saved_model_spec.model_path,
-        inference_spec_type.saved_model_spec.signature_name,
-        _get_tags(inference_spec_type))
-    if not signatures:
-      raise ValueError('Model does not have valid signature to use')
-
-    if len(signatures) == 1:
-      method_name = signatures[0].signature_def.method_name
-      if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
-        return _OperationType.CLASSIFICATION
-      elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
-        return _OperationType.REGRESSION
-      elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
-        return _OperationType.PREDICTION
-      else:
-        raise ValueError('Unsupported signature method_name %s' % method_name)
+    signature: _SignatureDef,
+) -> Tuple[str, Dict[str, str]]:
+    """Returns input tensor name and output alias tensor names from signature.
+
+    Args:
+    ----
+      signature: SignatureDef
+
+    Returns:
+    -------
+      A tuple of input tensor name and output alias tensor names.
+    """
+    input_tensor_name = list(signature.inputs.values())[0].name
+    output_alias_tensor_names = dict(
+        [(key, output.name) for key, output in signature.outputs.items()]
+    )
+    return input_tensor_name, output_alias_tensor_names
+
+
+def _get_signatures(
+    model_path: str, signatures: Sequence[str], tags: Sequence[str]
+) -> Sequence[_Signature]:
+    """Returns a sequence of {model_signature_name: signature}."""
+    if signatures:
+        signature_names = signatures
     else:
-      for signature in signatures:
-        method_name = signature.signature_def.method_name
-        if (method_name != tf.saved_model.CLASSIFY_METHOD_NAME and
-            method_name != tf.saved_model.REGRESS_METHOD_NAME):
-          raise ValueError('Unsupported signature method_name for multi-head '
-                           'model inference: %s' % method_name)
-      return _OperationType.MULTI_INFERENCE
-  else:
-    # Remote inference supports predictions only.
-    return _OperationType.PREDICTION
+        signature_names = [tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
+
+    saved_model_pb = loader_impl.parse_saved_model(model_path)
+    meta_graph_def = _get_meta_graph_def(saved_model_pb, tags)
+    result = []
+    for signature_name in signature_names:
+        if signature_name in meta_graph_def.signature_def:
+            result.append(
+                _Signature(signature_name, meta_graph_def.signature_def[signature_name])
+            )
+        else:
+            raise RuntimeError(
+                "Signature %s could not be found in SavedModel" % signature_name
+            )
+    return result
 
 
-def _get_meta_graph_def(saved_model_pb: _SavedModel,
-                        tags: Sequence[Text]) -> _MetaGraphDef:
-  """Returns MetaGraphDef from SavedModel."""
+def _get_operation_type(inference_spec_type: model_spec_pb2.InferenceSpecType) -> str:
+    if _using_in_process_inference(inference_spec_type):
+        signatures = _get_signatures(
+            inference_spec_type.saved_model_spec.model_path,
+            inference_spec_type.saved_model_spec.signature_name,
+            _get_tags(inference_spec_type),
+        )
+        if not signatures:
+            raise ValueError("Model does not have valid signature to use")
+
+        if len(signatures) == 1:
+            method_name = signatures[0].signature_def.method_name
+            if method_name == tf.saved_model.CLASSIFY_METHOD_NAME:
+                return _OperationType.CLASSIFICATION
+            elif method_name == tf.saved_model.REGRESS_METHOD_NAME:
+                return _OperationType.REGRESSION
+            elif method_name == tf.saved_model.PREDICT_METHOD_NAME:
+                return _OperationType.PREDICTION
+            else:
+                raise ValueError("Unsupported signature method_name %s" % method_name)
+        else:
+            for signature in signatures:
+                method_name = signature.signature_def.method_name
+                if (
+                    method_name != tf.saved_model.CLASSIFY_METHOD_NAME
+                    and method_name != tf.saved_model.REGRESS_METHOD_NAME
+                ):
+                    raise ValueError(
+                        "Unsupported signature method_name for multi-head "
+                        "model inference: %s" % method_name
+                    )
+            return _OperationType.MULTI_INFERENCE
+    else:
+        # Remote inference supports predictions only.
+        return _OperationType.PREDICTION
 
-  for meta_graph_def in saved_model_pb.meta_graphs:
-    if set(meta_graph_def.meta_info_def.tags) == set(tags):
-      return meta_graph_def
-  raise RuntimeError('MetaGraphDef associated with tags %s could not be '
-                     'found in SavedModel' % tags)
 
+def _get_meta_graph_def(
+    saved_model_pb: _SavedModel, tags: Sequence[str]
+) -> _MetaGraphDef:
+    """Returns MetaGraphDef from SavedModel."""
+    for meta_graph_def in saved_model_pb.meta_graphs:
+        if set(meta_graph_def.meta_info_def.tags) == set(tags):
+            return meta_graph_def
+    raise RuntimeError(
+        "MetaGraphDef associated with tags %s could not be "
+        "found in SavedModel" % tags
+    )
 
-def _get_tags(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[Text]:
-  """Returns tags from ModelSpec."""
 
-  if inference_spec_type.saved_model_spec.tag:
-    return list(inference_spec_type.saved_model_spec.tag)
-  else:
-    return [tf.saved_model.SERVING]
+def _get_tags(inference_spec_type: model_spec_pb2.InferenceSpecType) -> Sequence[str]:
+    """Returns tags from ModelSpec."""
+    if inference_spec_type.saved_model_spec.tag:
+        return list(inference_spec_type.saved_model_spec.tag)
+    else:
+        return [tf.saved_model.SERVING]
 
 
-_T = TypeVar('_T')
+_T = TypeVar("_T")
 
 
 def _flatten_examples(
-    maybe_nested_examples: List[Union[_T, List[_T]]]
+    maybe_nested_examples: List[Union[_T, List[_T]]],
 ) -> Tuple[List[_T], Optional[List[int]], Optional[int]]:
-  """Flattens nested examples, and returns corresponding nested list indices."""
-  if (not maybe_nested_examples or
-      not isinstance(maybe_nested_examples[0], list)):
-    return maybe_nested_examples, None, None
-  idx = []
-  flattened = []
-  for i in range(len(maybe_nested_examples)):
-    for ex in maybe_nested_examples[i]:
-      idx.append(i)
-      flattened.append(ex)
-  return flattened, idx, len(maybe_nested_examples)
-
-
-def _nest_results(flat_results: Iterable[_T], idx: Optional[List[int]],
-                  max_idx: Optional[int]) -> List[Union[_T, List[_T]]]:
-  """Reverses operation of _flatten_examples if indices are provided."""
-  if idx is None:
-    return list(flat_results)
-  nested_results = []
-  for _ in range(max_idx):
-    nested_results.append([])
-  for result, i in zip(flat_results, idx):
-    nested_results[i].append(result)
-  return nested_results
+    """Flattens nested examples, and returns corresponding nested list indices."""
+    if not maybe_nested_examples or not isinstance(maybe_nested_examples[0], list):
+        return maybe_nested_examples, None, None
+    idx = []
+    flattened = []
+    for i in range(len(maybe_nested_examples)):
+        for ex in maybe_nested_examples[i]:
+            idx.append(i)
+            flattened.append(ex)
+    return flattened, idx, len(maybe_nested_examples)
+
+
+def _nest_results(
+    flat_results: Iterable[_T], idx: Optional[List[int]], max_idx: Optional[int]
+) -> List[Union[_T, List[_T]]]:
+    """Reverses operation of _flatten_examples if indices are provided."""
+    if idx is None:
+        return list(flat_results)
+    nested_results = []
+    for _ in range(max_idx):
+        nested_results.append([])
+    for result, i in zip(flat_results, idx):
+        nested_results[i].append(result)
+    return nested_results
 
 
 # TODO(b/231328769): Overload batch args when available.
 class _ModelHandlerWrapper(base.ModelHandler):
-  """Wrapper that handles key forwarding and pre-batching of inputs.
+    """Wrapper that handles key forwarding and pre-batching of inputs.
 
-  This wrapper accepts mapping ExampleType -> PredictType,
-  and itself maps either
+    This wrapper accepts mapping ExampleType -> PredictType,
+    and itself maps either
 
-  * ExampleType -> PredictType
+    * ExampleType -> PredictType
 
-  * Tuple[K, ExampleType] -> Tuple[K, PredictType]
+    * Tuple[K, ExampleType] -> Tuple[K, PredictType]
 
-  * Tuple[K, List[ExampleType]] -> Tuple[K, List[PredictType]]
+    * Tuple[K, List[ExampleType]] -> Tuple[K, List[PredictType]]
 
-  The second mode can support forwarding metadata with a one-to-one relationship
-  to examples, while the third supports forwarding metadata with a many-to-one
-  relationship.
+    The second mode can support forwarding metadata with a one-to-one relationship
+    to examples, while the third supports forwarding metadata with a many-to-one
+    relationship.
 
-  Note that ExampleType can not be a Tuple or a List.
-  """
+    Note that ExampleType can not be a Tuple or a List.
+    """
 
-  def __init__(self, model_handler: base.ModelHandler):
-    super().__init__()
-    self._model_handler = model_handler
+    def __init__(self, model_handler: base.ModelHandler):
+        super().__init__()
+        self._model_handler = model_handler
 
-  def load_model(self) -> Any:
-    return self._model_handler.load_model()
+    def load_model(self) -> Any:
+        return self._model_handler.load_model()
 
-  def run_inference(self,
-                    batch: Sequence[Any],
-                    model: Any,
-                    inference_args=None) -> Sequence[Any]:
-    if not batch:
-      return []
-    if isinstance(batch[0], tuple):
-      keys, examples = zip(*batch)
-    else:
-      keys, examples = None, batch
-    examples, nested_batch_idx, max_idx = _flatten_examples(examples)
-    predictions = self._model_handler.run_inference(examples, model)
-    predictions = _nest_results(predictions, nested_batch_idx, max_idx)
-    if keys:
-      return list(zip(keys, predictions))
-    return predictions
-
-  def get_num_bytes(self, batch: Any) -> int:
-    if isinstance(batch[0], tuple):
-      _, batch = zip(*batch)
-    batch, _, _ = _flatten_examples(batch)
-    return self._model_handler.get_num_bytes(batch)
-
-  def get_metrics_namespace(self) -> str:
-    return self._model_handler.get_metrics_namespace()
+    def run_inference(
+        self, batch: Sequence[Any], model: Any, inference_args=None
+    ) -> Sequence[Any]:
+        if not batch:
+            return []
+        if isinstance(batch[0], tuple):
+            keys, examples = zip(*batch)
+        else:
+            keys, examples = None, batch
+        examples, nested_batch_idx, max_idx = _flatten_examples(examples)
+        predictions = self._model_handler.run_inference(examples, model)
+        predictions = _nest_results(predictions, nested_batch_idx, max_idx)
+        if keys:
+            return list(zip(keys, predictions))
+        return predictions
+
+    def get_num_bytes(self, batch: Any) -> int:
+        if isinstance(batch[0], tuple):
+            _, batch = zip(*batch)
+        batch, _, _ = _flatten_examples(batch)
+        return self._model_handler.get_num_bytes(batch)
+
+    def get_metrics_namespace(self) -> str:
+        return self._model_handler.get_metrics_namespace()
diff --git a/tfx_bsl/beam/test_helpers.py b/tfx_bsl/beam/test_helpers.py
index 6e3f7c71..ab258206 100644
--- a/tfx_bsl/beam/test_helpers.py
+++ b/tfx_bsl/beam/test_helpers.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 #
 # Copyright 2022 Google Inc. All Rights Reserved.
 #
@@ -17,6 +16,6 @@
 
 
 def make_test_beam_pipeline_kwargs():
-  # This is kwargs for apache_beam.Pipeline's __init__, using the default runner
-  # here.
-  return {}
+    # This is kwargs for apache_beam.Pipeline's __init__, using the default runner
+    # here.
+    return {}
diff --git a/tfx_bsl/coders/batch_util.py b/tfx_bsl/coders/batch_util.py
index 08a07582..cfbcf5bb 100644
--- a/tfx_bsl/coders/batch_util.py
+++ b/tfx_bsl/coders/batch_util.py
@@ -16,9 +16,10 @@
 import inspect
 import math
 from typing import Any, Callable, Dict, List, Optional, Sequence, TypeVar
-from absl import flags
 
 import apache_beam as beam
+from absl import flags
+
 from tfx_bsl.telemetry import util as telemetry_util
 
 # Beam might grow the batch size too large for Arrow BinaryArray / ListArray
@@ -53,49 +54,49 @@
 
 
 def _UseByteSizeBatching() -> bool:
-  """Cautious access to `tfxio_use_byte_size_batching` flag value."""
-  return (
-      _USE_BYTE_SIZE_BATCHING.value
-      if flags.FLAGS.is_parsed()
-      else _USE_BYTE_SIZE_BATCHING.default
-  )
+    """Cautious access to `tfxio_use_byte_size_batching` flag value."""
+    return (
+        _USE_BYTE_SIZE_BATCHING.value
+        if flags.FLAGS.is_parsed()
+        else _USE_BYTE_SIZE_BATCHING.default
+    )
 
 
 def GetBatchElementsKwargs(
     batch_size: Optional[int], element_size_fn: Callable[[Any], int] = len
 ) -> Dict[str, Any]:
-  """Returns the kwargs to pass to beam.BatchElements()."""
-  if batch_size is not None:
-    return {
-        "min_batch_size": batch_size,
-        "max_batch_size": batch_size,
+    """Returns the kwargs to pass to beam.BatchElements()."""
+    if batch_size is not None:
+        return {
+            "min_batch_size": batch_size,
+            "max_batch_size": batch_size,
+        }
+    if _UseByteSizeBatching():
+        min_element_size = int(
+            math.ceil(_TARGET_BATCH_BYTES_SIZE / _BATCH_SIZE_CAP_WITH_BYTE_TARGET)
+        )
+        return {
+            "min_batch_size": _TARGET_BATCH_BYTES_SIZE,
+            "max_batch_size": _TARGET_BATCH_BYTES_SIZE,
+            "element_size_fn": lambda e: max(element_size_fn(e), min_element_size),
+        }
+    # Allow `BatchElements` to tune the values with the given parameters.
+    # We fix the tuning parameters here to prevent Beam changes from immediately
+    # affecting all dependencies.
+    result = {
+        "min_batch_size": 1,
+        "max_batch_size": _BATCH_SIZE_CAP,
+        "target_batch_overhead": 0.05,
+        "target_batch_duration_secs": 1,
+        "variance": 0.25,
     }
-  if _UseByteSizeBatching():
-    min_element_size = int(
-        math.ceil(_TARGET_BATCH_BYTES_SIZE / _BATCH_SIZE_CAP_WITH_BYTE_TARGET)
-    )
-    return {
-        "min_batch_size": _TARGET_BATCH_BYTES_SIZE,
-        "max_batch_size": _TARGET_BATCH_BYTES_SIZE,
-        "element_size_fn": lambda e: max(element_size_fn(e), min_element_size),
-    }
-  # Allow `BatchElements` to tune the values with the given parameters.
-  # We fix the tuning parameters here to prevent Beam changes from immediately
-  # affecting all dependencies.
-  result = {
-      "min_batch_size": 1,
-      "max_batch_size": _BATCH_SIZE_CAP,
-      "target_batch_overhead": 0.05,
-      "target_batch_duration_secs": 1,
-      "variance": 0.25,
-  }
-  batch_elements_signature = inspect.signature(beam.BatchElements)
-  if (
-      "target_batch_duration_secs_including_fixed_cost"
-      in batch_elements_signature.parameters
-  ):
-    result["target_batch_duration_secs_including_fixed_cost"] = 1
-  return result
+    batch_elements_signature = inspect.signature(beam.BatchElements)
+    if (
+        "target_batch_duration_secs_including_fixed_cost"
+        in batch_elements_signature.parameters
+    ):
+        result["target_batch_duration_secs_including_fixed_cost"] = 1
+    return result
 
 
 def _MakeAndIncrementBatchingMetrics(
@@ -103,16 +104,12 @@ def _MakeAndIncrementBatchingMetrics(
     batch_size: Optional[int],
     telemetry_descriptors: Optional[Sequence[str]],
 ) -> None:
-  """Increments metrics relevant to batching."""
-  namespace = telemetry_util.MakeTfxNamespace(
-      telemetry_descriptors or ["Unknown"]
-  )
-  beam.metrics.Metrics.counter(namespace, "tfxio_use_byte_size_batching").inc(
-      int(_UseByteSizeBatching())
-  )
-  beam.metrics.Metrics.counter(namespace, "desired_batch_size").inc(
-      batch_size or 0
-  )
+    """Increments metrics relevant to batching."""
+    namespace = telemetry_util.MakeTfxNamespace(telemetry_descriptors or ["Unknown"])
+    beam.metrics.Metrics.counter(namespace, "tfxio_use_byte_size_batching").inc(
+        int(_UseByteSizeBatching())
+    )
+    beam.metrics.Metrics.counter(namespace, "desired_batch_size").inc(batch_size or 0)
 
 
 T = TypeVar("T")
@@ -127,30 +124,32 @@ def BatchRecords(
     telemetry_descriptors: Optional[Sequence[str]],
     record_size_fn: Callable[[T], int] = len,
 ) -> beam.PCollection:
-  """Batches collection of records tuning the batch size if not provided.
-
-  Args:
-    records: A PCollection of records to batch.
-    batch_size: Desired batch size. If None, will be tuned for optimal
-      performance.
-    telemetry_descriptors: Descriptors to use for batching metrics.
-    record_size_fn: Function used to determine size of each record in bytes.
-      Only used if byte size-based batching is enabled. Defaults to `len`
-      function suitable for bytes records.
-
-  Returns:
-    A PCollection of batched records.
-  """
-  _ = (
-      records.pipeline
-      | "CreateSole" >> beam.Create([None])
-      | "IncrementMetrics"
-      >> beam.Map(
-          _MakeAndIncrementBatchingMetrics,
-          batch_size=batch_size,
-          telemetry_descriptors=telemetry_descriptors,
-      )
-  )
-  return records | "BatchElements" >> beam.BatchElements(
-      **GetBatchElementsKwargs(batch_size, record_size_fn)
-  )
+    """Batches collection of records tuning the batch size if not provided.
+
+    Args:
+    ----
+      records: A PCollection of records to batch.
+      batch_size: Desired batch size. If None, will be tuned for optimal
+        performance.
+      telemetry_descriptors: Descriptors to use for batching metrics.
+      record_size_fn: Function used to determine size of each record in bytes.
+        Only used if byte size-based batching is enabled. Defaults to `len`
+        function suitable for bytes records.
+
+    Returns:
+    -------
+      A PCollection of batched records.
+    """
+    _ = (
+        records.pipeline
+        | "CreateSole" >> beam.Create([None])
+        | "IncrementMetrics"
+        >> beam.Map(
+            _MakeAndIncrementBatchingMetrics,
+            batch_size=batch_size,
+            telemetry_descriptors=telemetry_descriptors,
+        )
+    )
+    return records | "BatchElements" >> beam.BatchElements(
+        **GetBatchElementsKwargs(batch_size, record_size_fn)
+    )
diff --git a/tfx_bsl/coders/batch_util_test.py b/tfx_bsl/coders/batch_util_test.py
index c186be45..ee9f6f1b 100644
--- a/tfx_bsl/coders/batch_util_test.py
+++ b/tfx_bsl/coders/batch_util_test.py
@@ -13,16 +13,12 @@
 # limitations under the License.
 """Tests for tfx_bsl.coders.batch_util."""
 
-import pytest
-
-from absl.testing import flagsaver
-
 import apache_beam as beam
+import pytest
+from absl.testing import absltest, flagsaver, parameterized
 from apache_beam.testing import util as beam_testing_util
 
 from tfx_bsl.coders import batch_util
-from absl.testing import absltest
-from absl.testing import parameterized
 
 _BATCH_RECORDS_TEST_CASES = (
     dict(
@@ -82,115 +78,112 @@
 
 
 class BatchUtilTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_BATCH_RECORDS_TEST_CASES)
-  def testGetBatchElementsKwargs(
-      self,
-      batch_size,
-      tfxio_use_byte_size_batching,
-      expected_kwargs,
-      element_size_fn=len,
-      expected_element_contributions=None,
-  ):
-
-    if self._testMethodName in [
-      "testGetBatchElementsKwargsbyte_size_batching",
-      "testGetBatchElementsKwargsbyte_size_batching_with_element_size_fn",
-    ]:
-      pytest.xfail(reason="Test fails and needs to be fixed. ")
-
-    with flagsaver.flagsaver(
-        tfxio_use_byte_size_batching=tfxio_use_byte_size_batching
+    @parameterized.named_parameters(*_BATCH_RECORDS_TEST_CASES)
+    def testGetBatchElementsKwargs(
+        self,
+        batch_size,
+        tfxio_use_byte_size_batching,
+        expected_kwargs,
+        element_size_fn=len,
+        expected_element_contributions=None,
     ):
-      kwargs = batch_util.GetBatchElementsKwargs(
-          batch_size, element_size_fn=element_size_fn
-      )
-      # This parameter may not be present in some Beam versions that we support.
-      target_batch_duration_secs_including_fixed_cost = kwargs.pop(
-          "target_batch_duration_secs_including_fixed_cost", None
-      )
-      self.assertIn(target_batch_duration_secs_including_fixed_cost, {1, None})
-      if expected_kwargs.pop("element_size_fn", None) is not None:
-        self.assertIn("element_size_fn", kwargs)
-        element_size_fn = kwargs.pop("element_size_fn")
-        for (
-            element,
-            expected_contribution,
-        ) in expected_element_contributions.items():
-          self.assertEqual(
-              element_size_fn(element),
-              expected_contribution,
-              msg=f"Unexpected contribution of element {element}",
-          )
-      self.assertDictEqual(kwargs, expected_kwargs)
-
-  @parameterized.named_parameters(*_BATCH_RECORDS_TEST_CASES)
-  def testBatchRecords(
-      self,
-      batch_size,
-      tfxio_use_byte_size_batching,
-      expected_kwargs,
-      element_size_fn=len,
-      expected_element_contributions=None,
-  ):
-
-    if self._testMethodName in [
-      "testBatchRecordsbatch_size_none",
-      "testBatchRecordsbyte_size_batching",
-      "testBatchRecordsbyte_size_batching_with_element_size_fn",
-      "testBatchRecordsfixed_batch_size",
-      "testBatchRecordsfixed_batch_size_byte_size_batching",
-    ]:
-      pytest.xfail(reason="PR 260 81 test fails and needs to be fixed. ")
-
-    del expected_kwargs
-    telemetry_descriptors = ["TestComponent"]
-    input_records = (
-        [b"asd", b"asds", b"123", b"gdgd" * 1000]
-        if expected_element_contributions is None
-        else expected_element_contributions.keys()
-    )
-
-    def AssertFn(batched_records):
-      # We can't validate the actual sizes since they depend on test
-      # environment.
-      self.assertNotEmpty(batched_records)
-      for batch in batched_records:
-        self.assertIsInstance(batch, list)
-        self.assertNotEmpty(batch)
-
-    with flagsaver.flagsaver(
-        tfxio_use_byte_size_batching=tfxio_use_byte_size_batching
+        if self._testMethodName in [
+            "testGetBatchElementsKwargsbyte_size_batching",
+            "testGetBatchElementsKwargsbyte_size_batching_with_element_size_fn",
+        ]:
+            pytest.xfail(reason="Test fails and needs to be fixed. ")
+
+        with flagsaver.flagsaver(
+            tfxio_use_byte_size_batching=tfxio_use_byte_size_batching
+        ):
+            kwargs = batch_util.GetBatchElementsKwargs(
+                batch_size, element_size_fn=element_size_fn
+            )
+            # This parameter may not be present in some Beam versions that we support.
+            target_batch_duration_secs_including_fixed_cost = kwargs.pop(
+                "target_batch_duration_secs_including_fixed_cost", None
+            )
+            self.assertIn(target_batch_duration_secs_including_fixed_cost, {1, None})
+            if expected_kwargs.pop("element_size_fn", None) is not None:
+                self.assertIn("element_size_fn", kwargs)
+                element_size_fn = kwargs.pop("element_size_fn")
+                for (
+                    element,
+                    expected_contribution,
+                ) in expected_element_contributions.items():
+                    self.assertEqual(
+                        element_size_fn(element),
+                        expected_contribution,
+                        msg=f"Unexpected contribution of element {element}",
+                    )
+            self.assertDictEqual(kwargs, expected_kwargs)
+
+    @parameterized.named_parameters(*_BATCH_RECORDS_TEST_CASES)
+    def testBatchRecords(
+        self,
+        batch_size,
+        tfxio_use_byte_size_batching,
+        expected_kwargs,
+        element_size_fn=len,
+        expected_element_contributions=None,
     ):
-      p = beam.Pipeline()
-      batched_records_pcoll = (
-          p
-          | beam.Create(input_records)
-          | batch_util.BatchRecords(
-              batch_size, telemetry_descriptors, record_size_fn=element_size_fn
-          )
-      )
-      beam_testing_util.assert_that(batched_records_pcoll, AssertFn)
-      pipeline_result = p.run()
-      pipeline_result.wait_until_finish()
-      all_metrics = pipeline_result.metrics()
-      maintained_metrics = all_metrics.query(
-          beam.metrics.metric.MetricsFilter().with_namespace(
-              "tfx." + ".".join(telemetry_descriptors)
-          )
-      )
-      self.assertIsNotNone(maintained_metrics)
-      counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
-      self.assertLen(counters, 2)
-      expected_counters = {
-          "tfxio_use_byte_size_batching": int(tfxio_use_byte_size_batching),
-          "desired_batch_size": batch_size or 0,
-      }
-      for counter in counters:
-        self.assertEqual(
-            counter.result, expected_counters[counter.key.metric.name]
+        if self._testMethodName in [
+            "testBatchRecordsbatch_size_none",
+            "testBatchRecordsbyte_size_batching",
+            "testBatchRecordsbyte_size_batching_with_element_size_fn",
+            "testBatchRecordsfixed_batch_size",
+            "testBatchRecordsfixed_batch_size_byte_size_batching",
+        ]:
+            pytest.xfail(reason="PR 260 81 test fails and needs to be fixed. ")
+
+        del expected_kwargs
+        telemetry_descriptors = ["TestComponent"]
+        input_records = (
+            [b"asd", b"asds", b"123", b"gdgd" * 1000]
+            if expected_element_contributions is None
+            else expected_element_contributions.keys()
         )
 
+        def AssertFn(batched_records):
+            # We can't validate the actual sizes since they depend on test
+            # environment.
+            self.assertNotEmpty(batched_records)
+            for batch in batched_records:
+                self.assertIsInstance(batch, list)
+                self.assertNotEmpty(batch)
+
+        with flagsaver.flagsaver(
+            tfxio_use_byte_size_batching=tfxio_use_byte_size_batching
+        ):
+            p = beam.Pipeline()
+            batched_records_pcoll = (
+                p
+                | beam.Create(input_records)
+                | batch_util.BatchRecords(
+                    batch_size, telemetry_descriptors, record_size_fn=element_size_fn
+                )
+            )
+            beam_testing_util.assert_that(batched_records_pcoll, AssertFn)
+            pipeline_result = p.run()
+            pipeline_result.wait_until_finish()
+            all_metrics = pipeline_result.metrics()
+            maintained_metrics = all_metrics.query(
+                beam.metrics.metric.MetricsFilter().with_namespace(
+                    "tfx." + ".".join(telemetry_descriptors)
+                )
+            )
+            self.assertIsNotNone(maintained_metrics)
+            counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
+            self.assertLen(counters, 2)
+            expected_counters = {
+                "tfxio_use_byte_size_batching": int(tfxio_use_byte_size_batching),
+                "desired_batch_size": batch_size or 0,
+            }
+            for counter in counters:
+                self.assertEqual(
+                    counter.result, expected_counters[counter.key.metric.name]
+                )
+
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/coders/csv_decoder.py b/tfx_bsl/coders/csv_decoder.py
index efd6becf..d6fc089d 100644
--- a/tfx_bsl/coders/csv_decoder.py
+++ b/tfx_bsl/coders/csv_decoder.py
@@ -16,375 +16,422 @@
 
 import csv
 import enum
-from typing import Any, Callable, Dict, Iterable, List, NamedTuple, Optional, Set, Text, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    List,
+    NamedTuple,
+    Optional,
+    Set,
+    Text,
+    Tuple,
+    Union,
+)
 
 import apache_beam as beam
 import numpy as np
 import pyarrow as pa
-from tfx_bsl.coders import batch_util
+from tensorflow_metadata.proto.v0 import schema_pb2, statistics_pb2
 
-from tensorflow_metadata.proto.v0 import schema_pb2
-from tensorflow_metadata.proto.v0 import statistics_pb2
+from tfx_bsl.coders import batch_util
 
 PARSE_CSV_LINE_YIELDS_RAW_RECORDS = True
 
 CSVCell = bytes
-CSVLine = Text
-ColumnName = Text
+CSVLine = str
+ColumnName = str
 
 
 class ColumnType(enum.IntEnum):
-  """Enum for the type of a CSV column."""
-  # column will not be in the result RecordBatch
-  IGNORE = -2
-  # column will be in the result RecordBatch but will be of Null type (which
-  # means this column contains only empty value).
-  UNKNOWN = -1
-  INT = statistics_pb2.FeatureNameStatistics.INT
-  FLOAT = statistics_pb2.FeatureNameStatistics.FLOAT
-  STRING = statistics_pb2.FeatureNameStatistics.STRING
-
-  # We need the following to hold for type inference to work.
-  assert UNKNOWN < INT
-  assert INT < FLOAT
-  assert FLOAT < STRING
-
-
-ColumnInfo = NamedTuple("ColumnInfo", [
-    ("name", ColumnName),
-    ("type", ColumnType),
-])
+    """Enum for the type of a CSV column."""
+
+    # column will not be in the result RecordBatch
+    IGNORE = -2
+    # column will be in the result RecordBatch but will be of Null type (which
+    # means this column contains only empty value).
+    UNKNOWN = -1
+    INT = statistics_pb2.FeatureNameStatistics.INT
+    FLOAT = statistics_pb2.FeatureNameStatistics.FLOAT
+    STRING = statistics_pb2.FeatureNameStatistics.STRING
+
+    # We need the following to hold for type inference to work.
+    assert UNKNOWN < INT
+    assert INT < FLOAT
+    assert FLOAT < STRING
+
+
+class ColumnInfo(NamedTuple):
+    name: ColumnName
+    type: ColumnType
+
 
 _SCHEMA_TYPE_TO_COLUMN_TYPE = {
     schema_pb2.INT: ColumnType.INT,
     schema_pb2.FLOAT: ColumnType.FLOAT,
-    schema_pb2.BYTES: ColumnType.STRING
+    schema_pb2.BYTES: ColumnType.STRING,
 }
 
 _FEATURE_TYPE_TO_ARROW_TYPE = {
-        ColumnType.UNKNOWN: pa.null(),
-        ColumnType.INT: pa.large_list(pa.int64()),
-        ColumnType.FLOAT: pa.large_list(pa.float32()),
-        ColumnType.STRING: pa.large_list(pa.large_binary())
+    ColumnType.UNKNOWN: pa.null(),
+    ColumnType.INT: pa.large_list(pa.int64()),
+    ColumnType.FLOAT: pa.large_list(pa.float32()),
+    ColumnType.STRING: pa.large_list(pa.large_binary()),
 }
 
 
 @beam.ptransform_fn
 @beam.typehints.with_input_types(CSVLine)
 @beam.typehints.with_output_types(pa.RecordBatch)
-def CSVToRecordBatch(lines: beam.pvalue.PCollection,
-                     column_names: List[Text],
-                     desired_batch_size: Optional[int],
-                     delimiter: Text = ",",
-                     skip_blank_lines: bool = True,
-                     schema: Optional[schema_pb2.Schema] = None,
-                     multivalent_columns: Optional[List[Text]] = None,
-                     secondary_delimiter: Optional[Text] = None,
-                     raw_record_column_name: Optional[Text] = None):
-  """Decodes CSV records into Arrow RecordBatches.
-
-  Args:
-    lines: The pcollection of raw records (csv lines).
-    column_names: List of feature names. Order must match the order in the CSV
-      file.
-    desired_batch_size: Batch size. The output Arrow RecordBatches will have as
-      many rows as the `desired_batch_size`. If None, the batch size is auto
-      tuned by beam.
-    delimiter: A one-character string used to separate fields.
-    skip_blank_lines: A boolean to indicate whether to skip over blank lines
-      rather than interpreting them as missing values.
-    schema: An optional schema of the input data. If this is provided, it must
-      contain a subset of columns in `column_names`. If a feature is in
-      `column_names` but not in the schema, it won't be in the result
-      RecordBatch.
-    multivalent_columns: Columns that can contain multiple values. If
-      secondary_delimiter is provided, this must also be provided.
-    secondary_delimiter: Delimiter used for parsing multivalent columns. If
-      multivalent_columns is provided, this must also be provided.
-    raw_record_column_name: Optional name for a column containing the raw csv
-      lines. If this is None, then this column will not be produced. This will
-      always be the last column in the record batch.
-
-  Returns:
-    RecordBatches of the CSV lines.
-
-  Raises:
-    ValueError:
-      * If the columns do not match the specified csv headers.
-      * If the schema has invalid feature types.
-      * If the schema does not contain all columns.
-      * If raw_record_column_name exists in column_names
-  """
-  if (raw_record_column_name is not None and
-      raw_record_column_name in column_names):
-    raise ValueError(
-        "raw_record_column_name: {} is already an existing column name. "
-        "Please choose a different name.".format(raw_record_column_name))
-
-  csv_lines_and_raw_records = (
-      lines | "ParseCSVLines" >> beam.ParDo(ParseCSVLine(delimiter)))
-
-  if schema is not None:
-    column_infos = _GetColumnInfosFromSchema(schema, column_names)
-  else:
-    # TODO(b/72746442): Consider using a DeepCopy optimization similar to TFT.
-    # Do first pass to infer the feature types.
-    column_infos = beam.pvalue.AsSingleton(
+def CSVToRecordBatch(
+    lines: beam.pvalue.PCollection,
+    column_names: List[str],
+    desired_batch_size: Optional[int],
+    delimiter: str = ",",
+    skip_blank_lines: bool = True,
+    schema: Optional[schema_pb2.Schema] = None,
+    multivalent_columns: Optional[List[str]] = None,
+    secondary_delimiter: Optional[str] = None,
+    raw_record_column_name: Optional[str] = None,
+):
+    """Decodes CSV records into Arrow RecordBatches.
+
+    Args:
+    ----
+      lines: The pcollection of raw records (csv lines).
+      column_names: List of feature names. Order must match the order in the CSV
+        file.
+      desired_batch_size: Batch size. The output Arrow RecordBatches will have as
+        many rows as the `desired_batch_size`. If None, the batch size is auto
+        tuned by beam.
+      delimiter: A one-character string used to separate fields.
+      skip_blank_lines: A boolean to indicate whether to skip over blank lines
+        rather than interpreting them as missing values.
+      schema: An optional schema of the input data. If this is provided, it must
+        contain a subset of columns in `column_names`. If a feature is in
+        `column_names` but not in the schema, it won't be in the result
+        RecordBatch.
+      multivalent_columns: Columns that can contain multiple values. If
+        secondary_delimiter is provided, this must also be provided.
+      secondary_delimiter: Delimiter used for parsing multivalent columns. If
+        multivalent_columns is provided, this must also be provided.
+      raw_record_column_name: Optional name for a column containing the raw csv
+        lines. If this is None, then this column will not be produced. This will
+        always be the last column in the record batch.
+
+    Returns:
+    -------
+      RecordBatches of the CSV lines.
+
+    Raises:
+    ------
+      ValueError:
+        * If the columns do not match the specified csv headers.
+        * If the schema has invalid feature types.
+        * If the schema does not contain all columns.
+        * If raw_record_column_name exists in column_names
+    """
+    if raw_record_column_name is not None and raw_record_column_name in column_names:
+        raise ValueError(
+            f"raw_record_column_name: {raw_record_column_name} is already an existing column name. "
+            "Please choose a different name."
+        )
+
+    csv_lines_and_raw_records = lines | "ParseCSVLines" >> beam.ParDo(
+        ParseCSVLine(delimiter)
+    )
+
+    if schema is not None:
+        column_infos = _GetColumnInfosFromSchema(schema, column_names)
+    else:
+        # TODO(b/72746442): Consider using a DeepCopy optimization similar to TFT.
+        # Do first pass to infer the feature types.
+        column_infos = beam.pvalue.AsSingleton(
+            csv_lines_and_raw_records
+            | "ExtractParsedCSVLines" >> beam.Keys()
+            | "InferColumnTypes"
+            >> beam.CombineGlobally(
+                ColumnTypeInferrer(
+                    column_names=column_names,
+                    skip_blank_lines=skip_blank_lines,
+                    multivalent_columns=multivalent_columns,
+                    secondary_delimiter=secondary_delimiter,
+                )
+            )
+        )
+
+    # Do second pass to generate the RecordBatches.
+    return (
         csv_lines_and_raw_records
-        | "ExtractParsedCSVLines" >> beam.Keys()
-        | "InferColumnTypes" >> beam.CombineGlobally(
-            ColumnTypeInferrer(
-                column_names=column_names,
+        | "BatchCSVLines"
+        >> batch_util.BatchRecords(
+            desired_batch_size,
+            telemetry_descriptors=["CSVToRecordBatch"],
+            # The elements are tuples of parsed and unparsed CSVlines.
+            record_size_fn=lambda kv: len(kv[1]) << 1,
+        )
+        | "BatchedCSVRowsToArrow"
+        >> beam.ParDo(
+            BatchedCSVRowsToRecordBatch(
                 skip_blank_lines=skip_blank_lines,
                 multivalent_columns=multivalent_columns,
-                secondary_delimiter=secondary_delimiter)))
-
-  # Do second pass to generate the RecordBatches.
-  return (
-      csv_lines_and_raw_records
-      | "BatchCSVLines"
-      >> batch_util.BatchRecords(
-          desired_batch_size,
-          telemetry_descriptors=["CSVToRecordBatch"],
-          # The elements are tuples of parsed and unparsed CSVlines.
-          record_size_fn=lambda kv: len(kv[1]) << 1,
-      )
-      | "BatchedCSVRowsToArrow"
-      >> beam.ParDo(
-          BatchedCSVRowsToRecordBatch(
-              skip_blank_lines=skip_blank_lines,
-              multivalent_columns=multivalent_columns,
-              secondary_delimiter=secondary_delimiter,
-              raw_record_column_name=raw_record_column_name,
-          ),
-          column_infos,
-      )
-  )
+                secondary_delimiter=secondary_delimiter,
+                raw_record_column_name=raw_record_column_name,
+            ),
+            column_infos,
+        )
+    )
 
 
 @beam.typehints.with_input_types(CSVLine)
 @beam.typehints.with_output_types(Tuple[List[CSVCell], CSVLine])
 class ParseCSVLine(beam.DoFn):
-  """A beam.DoFn to parse CSVLines into Tuple(List[CSVCell], CSVLine).
+    """A beam.DoFn to parse CSVLines into Tuple(List[CSVCell], CSVLine).
 
-  The CSVLine is the raw csv row. The raw csv row will always be output.
-  """
+    The CSVLine is the raw csv row. The raw csv row will always be output.
+    """
 
-  def __init__(self, delimiter: Text):
-    self._delimiter = delimiter
-    self._reader = None
+    def __init__(self, delimiter: str):
+        self._delimiter = delimiter
+        self._reader = None
 
-  def setup(self):
-    self._reader = _CSVRecordReader(self._delimiter)
+    def setup(self):
+        self._reader = _CSVRecordReader(self._delimiter)
 
-  def process(self,
-              csv_line: CSVLine) -> Iterable[Tuple[List[CSVCell], CSVLine]]:
-    assert self._reader is not None, "Reader uninitialized. Call setup() first."
-    line = self._reader.ReadLine(csv_line)
-    yield (line, csv_line)
+    def process(self, csv_line: CSVLine) -> Iterable[Tuple[List[CSVCell], CSVLine]]:
+        assert self._reader is not None, "Reader uninitialized. Call setup() first."
+        line = self._reader.ReadLine(csv_line)
+        yield (line, csv_line)
 
 
 @beam.typehints.with_input_types(List[CSVCell])
 @beam.typehints.with_output_types(List[ColumnInfo])
 class ColumnTypeInferrer(beam.CombineFn):
-  """A beam.CombineFn to infer CSV Column types.
-
-  Its input can be produced by ParseCSVLine().
-  """
-
-  def __init__(
-      self,
-      column_names: List[ColumnName],
-      skip_blank_lines: bool,
-      multivalent_columns: Optional[Set[ColumnName]] = None,
-      secondary_delimiter: Optional[Text] = None) -> None:
-    """Initializes a feature type inferrer combiner."""
-    self._column_names = column_names
-    self._skip_blank_lines = skip_blank_lines
-    self._multivalent_columns = (
-        multivalent_columns if multivalent_columns is not None else set())
-    if multivalent_columns:
-      assert secondary_delimiter, ("secondary_delimiter must be specified if "
-                                   "there are multivalent columns")
-      self._multivalent_reader = _CSVRecordReader(secondary_delimiter)
-
-  def create_accumulator(self) -> Dict[ColumnName, ColumnType]:
-    """Creates an empty accumulator to keep track of the feature types."""
-    return {}
-
-  def add_input(self, accumulator: Dict[ColumnName, ColumnType],
-                cells: List[CSVCell]) -> Dict[ColumnName, ColumnType]:
-    """Updates the feature types in the accumulator using the input row.
+    """A beam.CombineFn to infer CSV Column types.
 
-    Args:
-      accumulator: A dict containing the already inferred feature types.
-      cells: A list containing feature values of a CSV record.
-
-    Returns:
-      A dict containing the updated feature types based on input row.
-
-    Raises:
-      ValueError: If the columns do not match the specified csv headers.
+    Its input can be produced by ParseCSVLine().
     """
-    # If the row is empty and we don't want to skip blank lines,
-    # add an empty string to each column.
-    if not cells and not self._skip_blank_lines:
-      cells = ["" for _ in range(len(self._column_names))]
-    elif cells and len(cells) != len(self._column_names):
-      raise ValueError("Columns do not match specified csv headers: %s -> %s" %
-                       (self._column_names, cells))
-
-    # Iterate over each feature value and update the type.
-    for column_name, cell in zip(self._column_names, cells):
-
-      # Get the already inferred type of the feature.
-      previous_type = accumulator.get(column_name, None)
-      if column_name in self._multivalent_columns:
-        # the reader only accepts str but v is bytes.
-        values = self._multivalent_reader.ReadLine(cell.decode())  # pytype: disable=attribute-error  # trace-all-classes
-        current_type = max([_InferValueType(value) for value in values
-                           ]) if values else ColumnType.UNKNOWN
-      else:
-        current_type = _InferValueType(cell)
-
-      # If the type inferred from the current value is higher in the type
-      # hierarchy compared to the already inferred type, we update the type.
-      # The type hierarchy is,
-      #   INT (level 0) --> FLOAT (level 1) --> STRING (level 2)
-      if previous_type is None or current_type > previous_type:
-        accumulator[column_name] = current_type
-    return accumulator
-
-  def merge_accumulators(
-      self, accumulators: List[Dict[ColumnName, ColumnType]]
-  ) -> Dict[ColumnName, ColumnType]:
-    """Merge the feature types inferred from the different partitions.
 
-    Args:
-      accumulators: A list of dicts containing the feature types inferred from
-        the different partitions of the data.
-
-    Returns:
-      A dict containing the merged feature types.
-    """
-    result = {}
-    for shard_types in accumulators:
-      # Merge the types inferred in each partition using the type hierarchy.
-      # Specifically, whenever we observe a type higher in the type hierarchy
-      # we update the type.
-      for feature_name, feature_type in shard_types.items():
-        if feature_name not in result or feature_type > result[feature_name]:
-          result[feature_name] = feature_type
-    return result
-
-  def extract_output(
-      self, accumulator: Dict[ColumnName, ColumnType]) -> List[ColumnInfo]:
-    """Return a list of tuples containing the column info."""
-    return [
-        ColumnInfo(col_name, accumulator.get(col_name, ColumnType.UNKNOWN))
-        for col_name in self._column_names
-    ]
-
-
-@beam.typehints.with_input_types(
-    List[Tuple[List[CSVCell], CSVLine]],
-    List[ColumnInfo])
+    def __init__(
+        self,
+        column_names: List[ColumnName],
+        skip_blank_lines: bool,
+        multivalent_columns: Optional[Set[ColumnName]] = None,
+        secondary_delimiter: Optional[str] = None,
+    ) -> None:
+        """Initializes a feature type inferrer combiner."""
+        self._column_names = column_names
+        self._skip_blank_lines = skip_blank_lines
+        self._multivalent_columns = (
+            multivalent_columns if multivalent_columns is not None else set()
+        )
+        if multivalent_columns:
+            assert secondary_delimiter, (
+                "secondary_delimiter must be specified if "
+                "there are multivalent columns"
+            )
+            self._multivalent_reader = _CSVRecordReader(secondary_delimiter)
+
+    def create_accumulator(self) -> Dict[ColumnName, ColumnType]:
+        """Creates an empty accumulator to keep track of the feature types."""
+        return {}
+
+    def add_input(
+        self, accumulator: Dict[ColumnName, ColumnType], cells: List[CSVCell]
+    ) -> Dict[ColumnName, ColumnType]:
+        """Updates the feature types in the accumulator using the input row.
+
+        Args:
+        ----
+          accumulator: A dict containing the already inferred feature types.
+          cells: A list containing feature values of a CSV record.
+
+        Returns:
+        -------
+          A dict containing the updated feature types based on input row.
+
+        Raises:
+        ------
+          ValueError: If the columns do not match the specified csv headers.
+        """
+        # If the row is empty and we don't want to skip blank lines,
+        # add an empty string to each column.
+        if not cells and not self._skip_blank_lines:
+            cells = ["" for _ in range(len(self._column_names))]
+        elif cells and len(cells) != len(self._column_names):
+            raise ValueError(
+                "Columns do not match specified csv headers: %s -> %s"
+                % (self._column_names, cells)
+            )
+
+        # Iterate over each feature value and update the type.
+        for column_name, cell in zip(self._column_names, cells):
+            # Get the already inferred type of the feature.
+            previous_type = accumulator.get(column_name)
+            if column_name in self._multivalent_columns:
+                # the reader only accepts str but v is bytes.
+                values = self._multivalent_reader.ReadLine(
+                    cell.decode()
+                )  # pytype: disable=attribute-error  # trace-all-classes
+                current_type = (
+                    max([_InferValueType(value) for value in values])
+                    if values
+                    else ColumnType.UNKNOWN
+                )
+            else:
+                current_type = _InferValueType(cell)
+
+            # If the type inferred from the current value is higher in the type
+            # hierarchy compared to the already inferred type, we update the type.
+            # The type hierarchy is,
+            #   INT (level 0) --> FLOAT (level 1) --> STRING (level 2)
+            if previous_type is None or current_type > previous_type:
+                accumulator[column_name] = current_type
+        return accumulator
+
+    def merge_accumulators(
+        self, accumulators: List[Dict[ColumnName, ColumnType]]
+    ) -> Dict[ColumnName, ColumnType]:
+        """Merge the feature types inferred from the different partitions.
+
+        Args:
+        ----
+          accumulators: A list of dicts containing the feature types inferred from
+            the different partitions of the data.
+
+        Returns:
+        -------
+          A dict containing the merged feature types.
+        """
+        result = {}
+        for shard_types in accumulators:
+            # Merge the types inferred in each partition using the type hierarchy.
+            # Specifically, whenever we observe a type higher in the type hierarchy
+            # we update the type.
+            for feature_name, feature_type in shard_types.items():
+                if feature_name not in result or feature_type > result[feature_name]:
+                    result[feature_name] = feature_type
+        return result
+
+    def extract_output(
+        self, accumulator: Dict[ColumnName, ColumnType]
+    ) -> List[ColumnInfo]:
+        """Return a list of tuples containing the column info."""
+        return [
+            ColumnInfo(col_name, accumulator.get(col_name, ColumnType.UNKNOWN))
+            for col_name in self._column_names
+        ]
+
+
+@beam.typehints.with_input_types(List[Tuple[List[CSVCell], CSVLine]], List[ColumnInfo])
 @beam.typehints.with_output_types(pa.RecordBatch)
 class BatchedCSVRowsToRecordBatch(beam.DoFn):
-  """DoFn to convert a batch of csv rows to a RecordBatch."""
-
-  def __init__(self,
-               skip_blank_lines: bool,
-               multivalent_columns: Optional[Set[ColumnName]] = None,
-               secondary_delimiter: Optional[Text] = None,
-               raw_record_column_name: Optional[Text] = None):
-    self._skip_blank_lines = skip_blank_lines
-    self._multivalent_columns = (
-        multivalent_columns if multivalent_columns is not None else set())
-    if multivalent_columns:
-      assert secondary_delimiter, ("secondary_delimiter must be specified if "
-                                   "there are multivalent columns")
-      self._multivalent_reader = _CSVRecordReader(secondary_delimiter)
-    self._raw_record_column_name = raw_record_column_name
-    self._raw_record_column_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(
-        ColumnType.STRING)
-
-    # Note that len(_column_handlers) == len(column_infos) but
-    # len(_column_names) and len(_column_arrow_types) may not equal to that,
-    # because columns of type IGNORE are not there.
-    self._column_handlers = None
-    self._column_names = []
-    self._column_arrow_types = None
-
-  def _get_column_handler(
-      self, column_info: ColumnInfo
-  ) -> Optional[Callable[[CSVCell], Optional[Iterable[Union[int, float,
-                                                            bytes]]]]]:
-    if column_info.type == ColumnType.IGNORE:
-      return None
-    value_converter = _VALUE_CONVERTER_MAP.get(column_info.type)
-    assert value_converter is not None
-    if column_info.name in self._multivalent_columns:
-      # If the column is multivalent and unknown, we treat it as a univalent
-      # column. This will result in a null array instead of a list<null>", as
-      # TFDV does not support list<null>.
-      if column_info.type is ColumnType.UNKNOWN:
-        return lambda v: None
-      return lambda v: [  # pylint: disable=g-long-lambda
-          value_converter(sub_v)
-          # the reader only accepts str but v is bytes.
-          for sub_v in self._multivalent_reader.ReadLine(v.decode())
-      ]
-    else:
-      return lambda v: (value_converter(v),)
-
-  def _process_column_infos(self, column_infos: List[ColumnInfo]):
-    self._column_handlers = [self._get_column_handler(c) for c in column_infos]
-    self._column_arrow_types = [
-        _FEATURE_TYPE_TO_ARROW_TYPE.get(c.type)
-        for c in column_infos
-        if c.type != ColumnType.IGNORE
-    ]
-    self._column_names = [
-        c.name for c in column_infos if c.type != ColumnType.IGNORE]
-
-  def process(self, batch_of_tuple: List[Tuple[List[CSVCell], CSVLine]],
-              column_infos: List[ColumnInfo]) -> Iterable[pa.RecordBatch]:
-    if self._column_handlers is None:
-      self._process_column_infos(column_infos)
-
-    raw_records = []
-    values_list_by_column = [[] for _ in self._column_names]
-    for (csv_row, raw_record) in batch_of_tuple:
-      if not csv_row:
-        if not self._skip_blank_lines:
-          for l in values_list_by_column:
-            l.append(None)
-        continue
-      if len(csv_row) != len(self._column_handlers):
-        raise ValueError(
-            "Encountered a row of unexpected number of columns: {} vs. {}"
-            .format(len(csv_row), len(self._column_handlers)))
-      column_idx = 0
-      for csv_cell, handler in zip(csv_row, self._column_handlers):
-        if handler is None:
-          continue
-        values_list_by_column[column_idx].append(
-            handler(csv_cell) if csv_cell else None)
-        column_idx += 1
-      if self._raw_record_column_name is not None:
-        raw_records.append([raw_record])
-
-    arrow_arrays = [
-        pa.array(l, type=t)
-        for l, t in zip(values_list_by_column, self._column_arrow_types)
-    ]
-
-    if self._raw_record_column_name is not None:
-      arrow_arrays.append(
-          pa.array(raw_records, type=self._raw_record_column_type))
-      self._column_names.append(self._raw_record_column_name)
-    yield pa.RecordBatch.from_arrays(arrow_arrays, self._column_names)
+    """DoFn to convert a batch of csv rows to a RecordBatch."""
+
+    def __init__(
+        self,
+        skip_blank_lines: bool,
+        multivalent_columns: Optional[Set[ColumnName]] = None,
+        secondary_delimiter: Optional[str] = None,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        self._skip_blank_lines = skip_blank_lines
+        self._multivalent_columns = (
+            multivalent_columns if multivalent_columns is not None else set()
+        )
+        if multivalent_columns:
+            assert secondary_delimiter, (
+                "secondary_delimiter must be specified if "
+                "there are multivalent columns"
+            )
+            self._multivalent_reader = _CSVRecordReader(secondary_delimiter)
+        self._raw_record_column_name = raw_record_column_name
+        self._raw_record_column_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(
+            ColumnType.STRING
+        )
+
+        # Note that len(_column_handlers) == len(column_infos) but
+        # len(_column_names) and len(_column_arrow_types) may not equal to that,
+        # because columns of type IGNORE are not there.
+        self._column_handlers = None
+        self._column_names = []
+        self._column_arrow_types = None
+
+    def _get_column_handler(
+        self, column_info: ColumnInfo
+    ) -> Optional[Callable[[CSVCell], Optional[Iterable[Union[int, float, bytes]]]]]:
+        if column_info.type == ColumnType.IGNORE:
+            return None
+        value_converter = _VALUE_CONVERTER_MAP.get(column_info.type)
+        assert value_converter is not None
+        if column_info.name in self._multivalent_columns:
+            # If the column is multivalent and unknown, we treat it as a univalent
+            # column. This will result in a null array instead of a list<null>", as
+            # TFDV does not support list<null>.
+            if column_info.type is ColumnType.UNKNOWN:
+                return lambda v: None
+            return lambda v: [  # pylint: disable=g-long-lambda
+                value_converter(sub_v)
+                # the reader only accepts str but v is bytes.
+                for sub_v in self._multivalent_reader.ReadLine(v.decode())
+            ]
+        else:
+            return lambda v: (value_converter(v),)
+
+    def _process_column_infos(self, column_infos: List[ColumnInfo]):
+        self._column_handlers = [self._get_column_handler(c) for c in column_infos]
+        self._column_arrow_types = [
+            _FEATURE_TYPE_TO_ARROW_TYPE.get(c.type)
+            for c in column_infos
+            if c.type != ColumnType.IGNORE
+        ]
+        self._column_names = [
+            c.name for c in column_infos if c.type != ColumnType.IGNORE
+        ]
+
+    def process(
+        self,
+        batch_of_tuple: List[Tuple[List[CSVCell], CSVLine]],
+        column_infos: List[ColumnInfo],
+    ) -> Iterable[pa.RecordBatch]:
+        if self._column_handlers is None:
+            self._process_column_infos(column_infos)
+
+        raw_records = []
+        values_list_by_column = [[] for _ in self._column_names]
+        for csv_row, raw_record in batch_of_tuple:
+            if not csv_row:
+                if not self._skip_blank_lines:
+                    for l in values_list_by_column:
+                        l.append(None)
+                continue
+            if len(csv_row) != len(self._column_handlers):
+                raise ValueError(
+                    f"Encountered a row of unexpected number of columns: {len(csv_row)} vs. {len(self._column_handlers)}"
+                )
+            column_idx = 0
+            for csv_cell, handler in zip(csv_row, self._column_handlers):
+                if handler is None:
+                    continue
+                values_list_by_column[column_idx].append(
+                    handler(csv_cell) if csv_cell else None
+                )
+                column_idx += 1
+            if self._raw_record_column_name is not None:
+                raw_records.append([raw_record])
+
+        arrow_arrays = [
+            pa.array(l, type=t)
+            for l, t in zip(values_list_by_column, self._column_arrow_types)
+        ]
+
+        if self._raw_record_column_name is not None:
+            arrow_arrays.append(
+                pa.array(raw_records, type=self._raw_record_column_type)
+            )
+            self._column_names.append(self._raw_record_column_name)
+        yield pa.RecordBatch.from_arrays(arrow_arrays, self._column_names)
 
 
 _VALUE_CONVERTER_MAP = {
@@ -395,93 +442,100 @@ def process(self, batch_of_tuple: List[Tuple[List[CSVCell], CSVLine]],
 }
 
 
-def GetArrowSchema(column_names: List[Text],
-                   schema: schema_pb2.Schema,
-                   raw_record_column_name: Optional[Text] = None) -> pa.Schema:
-  """Returns the arrow schema given columns and a TFMD schema.
+def GetArrowSchema(
+    column_names: List[str],
+    schema: schema_pb2.Schema,
+    raw_record_column_name: Optional[str] = None,
+) -> pa.Schema:
+    """Returns the arrow schema given columns and a TFMD schema.
 
-  Args:
-    column_names: List of feature names. This must match the features in schema.
-    schema: The schema proto to base the arrow schema from.
-    raw_record_column_name: An optional name for the column containing raw
-     records. If this is not set, the arrow schema will not contain a raw
-     records column.
+    Args:
+    ----
+      column_names: List of feature names. This must match the features in schema.
+      schema: The schema proto to base the arrow schema from.
+      raw_record_column_name: An optional name for the column containing raw
+       records. If this is not set, the arrow schema will not contain a raw
+       records column.
 
-  Returns:
-    Arrow Schema based on the provided schema proto.
+    Returns:
+    -------
+      Arrow Schema based on the provided schema proto.
 
-  Raises:
-    ValueError:
-     * If the schema contains a feature that does not exist in `column_names`.
-     * If the feature type does not map to an arrow type.
-     * If raw_record_column_name exists in column_names
-  """
-  schema_feature_names = [f.name for f in schema.feature]
-  if not set(schema_feature_names).issubset(set(column_names)):
-    raise ValueError(
-        "Schema features are not a subset of column names: {} vs {}".format(
-            schema_feature_names, column_names))
+    Raises:
+    ------
+      ValueError:
+       * If the schema contains a feature that does not exist in `column_names`.
+       * If the feature type does not map to an arrow type.
+       * If raw_record_column_name exists in column_names
+    """
+    schema_feature_names = [f.name for f in schema.feature]
+    if not set(schema_feature_names).issubset(set(column_names)):
+        raise ValueError(
+            f"Schema features are not a subset of column names: {schema_feature_names} vs {column_names}"
+        )
 
-  fields = []
-  column_name_to_schema_feature_map = {f.name: f for f in schema.feature}
-  for col in column_names:
-    feature = column_name_to_schema_feature_map.get(col)
-    if feature is None:
-      continue
-    arrow_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(
-        _SCHEMA_TYPE_TO_COLUMN_TYPE.get(feature.type), None)
-    if arrow_type is None:
-      raise ValueError("Feature {} has unsupport type {}".format(
-          feature.name, feature.type))
-    fields.append(pa.field(feature.name, arrow_type))
+    fields = []
+    column_name_to_schema_feature_map = {f.name: f for f in schema.feature}
+    for col in column_names:
+        feature = column_name_to_schema_feature_map.get(col)
+        if feature is None:
+            continue
+        arrow_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(
+            _SCHEMA_TYPE_TO_COLUMN_TYPE.get(feature.type), None
+        )
+        if arrow_type is None:
+            raise ValueError(
+                f"Feature {feature.name} has unsupport type {feature.type}"
+            )
+        fields.append(pa.field(feature.name, arrow_type))
 
-  if raw_record_column_name is not None:
-    if raw_record_column_name in column_names:
-      raise ValueError(
-          "raw_record_column_name: {} is already an existing column name. "
-          "Please choose a different name.".format(raw_record_column_name))
-    raw_record_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(
-        ColumnType.STRING)
-    fields.append(pa.field(raw_record_column_name, raw_record_type))
-  return pa.schema(fields)
+    if raw_record_column_name is not None:
+        if raw_record_column_name in column_names:
+            raise ValueError(
+                f"raw_record_column_name: {raw_record_column_name} is already an existing column name. "
+                "Please choose a different name."
+            )
+        raw_record_type = _FEATURE_TYPE_TO_ARROW_TYPE.get(ColumnType.STRING)
+        fields.append(pa.field(raw_record_column_name, raw_record_type))
+    return pa.schema(fields)
 
 
-class _CSVRecordReader(object):
-  """A picklable wrapper for csv.reader that can parse one record at a time."""
+class _CSVRecordReader:
+    """A picklable wrapper for csv.reader that can parse one record at a time."""
 
-  def __init__(self, delimiter: Text):
-    self._delimiter = delimiter
-    self._line_iterator = _MutableRepeat()
-    self._reader = csv.reader(self._line_iterator, delimiter=delimiter)
+    def __init__(self, delimiter: str):
+        self._delimiter = delimiter
+        self._line_iterator = _MutableRepeat()
+        self._reader = csv.reader(self._line_iterator, delimiter=delimiter)
 
-  def ReadLine(self, csv_line: CSVLine) -> List[CSVCell]:
-    """Reads out bytes for PY2 and Unicode for PY3."""
-    self._line_iterator.SetItem(csv_line)
-    return [cell.encode() for cell in next(self._reader)]
+    def ReadLine(self, csv_line: CSVLine) -> List[CSVCell]:
+        """Reads out bytes for PY2 and Unicode for PY3."""
+        self._line_iterator.SetItem(csv_line)
+        return [cell.encode() for cell in next(self._reader)]
 
-  def __getstate__(self):
-    return (self._delimiter,)
+    def __getstate__(self):
+        return (self._delimiter,)
 
-  def __setstate__(self, state):
-    self.__init__(*state)
+    def __setstate__(self, state):
+        self.__init__(*state)
 
 
-class _MutableRepeat(object):
-  """Similar to itertools.repeat, but the item can be set on the fly."""
+class _MutableRepeat:
+    """Similar to itertools.repeat, but the item can be set on the fly."""
 
-  def __init__(self):
-    self._item = None
+    def __init__(self):
+        self._item = None
 
-  def SetItem(self, item: Any):
-    self._item = item
+    def SetItem(self, item: Any):
+        self._item = item
 
-  def __iter__(self) -> Any:
-    return self
+    def __iter__(self) -> Any:
+        return self
 
-  def __next__(self) -> Any:
-    return self._item
+    def __next__(self) -> Any:
+        return self._item
 
-  next = __next__
+    next = __next__
 
 
 _INT64_MIN = np.iinfo(np.int64).min
@@ -489,41 +543,42 @@ def __next__(self) -> Any:
 
 
 def _InferValueType(value: CSVCell) -> ColumnType:
-  """Infer column type from the input value."""
-  if not value:
-    return ColumnType.UNKNOWN
-
-  # Check if the value is of type INT.
-  try:
-    if _INT64_MIN <= int(value) <= _INT64_MAX:
-      return ColumnType.INT
-    # We infer STRING type when we have long integer values.
-    return ColumnType.STRING
-  except ValueError:
-    # If the type is not INT, we next check for FLOAT type (according to our
-    # type hierarchy). If we can convert the string to a float value, we
-    # fix the type to be FLOAT. Else we resort to STRING type.
+    """Infer column type from the input value."""
+    if not value:
+        return ColumnType.UNKNOWN
+
+    # Check if the value is of type INT.
     try:
-      float(value)
+        if _INT64_MIN <= int(value) <= _INT64_MAX:
+            return ColumnType.INT
+        # We infer STRING type when we have long integer values.
+        return ColumnType.STRING
     except ValueError:
-      return ColumnType.STRING
-    return ColumnType.FLOAT
+        # If the type is not INT, we next check for FLOAT type (according to our
+        # type hierarchy). If we can convert the string to a float value, we
+        # fix the type to be FLOAT. Else we resort to STRING type.
+        try:
+            float(value)
+        except ValueError:
+            return ColumnType.STRING
+        return ColumnType.FLOAT
 
 
 def _GetColumnInfosFromSchema(
-    schema: schema_pb2.Schema,
-    column_names: List[Text]) -> List[ColumnInfo]:
-  """Get column name and type from the input schema."""
-  feature_type_map = {}
-  for feature in schema.feature:
-    feature_type = _SCHEMA_TYPE_TO_COLUMN_TYPE.get(feature.type, None)
-    if feature_type is None:
-      raise ValueError("Schema contains invalid type: {}.".format(
-          schema_pb2.FeatureType.Name(feature.type)))
-    feature_type_map[feature.name] = feature_type
-
-  column_infos = []
-  for col_name in column_names:
-    feature_type = feature_type_map.get(col_name, ColumnType.IGNORE)
-    column_infos.append(ColumnInfo(col_name, feature_type))
-  return column_infos
+    schema: schema_pb2.Schema, column_names: List[str]
+) -> List[ColumnInfo]:
+    """Get column name and type from the input schema."""
+    feature_type_map = {}
+    for feature in schema.feature:
+        feature_type = _SCHEMA_TYPE_TO_COLUMN_TYPE.get(feature.type, None)
+        if feature_type is None:
+            raise ValueError(
+                f"Schema contains invalid type: {schema_pb2.FeatureType.Name(feature.type)}."
+            )
+        feature_type_map[feature.name] = feature_type
+
+    column_infos = []
+    for col_name in column_names:
+        feature_type = feature_type_map.get(col_name, ColumnType.IGNORE)
+        column_infos.append(ColumnInfo(col_name, feature_type))
+    return column_infos
diff --git a/tfx_bsl/coders/csv_decoder_test.py b/tfx_bsl/coders/csv_decoder_test.py
index ab49fa01..17b2ef05 100644
--- a/tfx_bsl/coders/csv_decoder_test.py
+++ b/tfx_bsl/coders/csv_decoder_test.py
@@ -1,4 +1,3 @@
-# coding=utf-8
 #
 # Copyright 2018 Google LLC
 #
@@ -16,393 +15,470 @@
 
 """Tests for CSV decoder."""
 
-import pytest
 import apache_beam as beam
-from apache_beam.testing import util as beam_test_util
 import numpy as np
 import pyarrow as pa
-from tfx_bsl.coders import csv_decoder
+import pytest
+from absl.testing import absltest, parameterized
+from apache_beam.testing import util as beam_test_util
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import csv_decoder
+
 _TEST_CASES = [
     dict(
-        testcase_name='simple',
-        input_lines=['1,2.0,hello', '5,12.34,world'],
-        column_names=['int_feature', 'float_feature', 'str_feature'],
+        testcase_name="simple",
+        input_lines=["1,2.0,hello", "5,12.34,world"],
+        column_names=["int_feature", "float_feature", "str_feature"],
         expected_csv_cells=[
-            [b'1', b'2.0', b'hello'],
-            [b'5', b'12.34', b'world'],
+            [b"1", b"2.0", b"hello"],
+            [b"5", b"12.34", b"world"],
         ],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.FLOAT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
-            pa.array([[b'hello'], [b'world']], pa.large_list(pa.large_binary()))
-        ], ['int_feature', 'float_feature', 'str_feature'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
+                pa.array([[b"hello"], [b"world"]], pa.large_list(pa.large_binary())),
+            ],
+            ["int_feature", "float_feature", "str_feature"],
+        ),
+    ),
     dict(
-        testcase_name='missing_values',
-        input_lines=[',,', '1,,hello', ',12.34,'],
-        column_names=['f1', 'f2', 'f3'],
+        testcase_name="missing_values",
+        input_lines=[",,", "1,,hello", ",12.34,"],
+        column_names=["f1", "f2", "f3"],
         expected_csv_cells=[
-            [b'', b'', b''],
-            [b'1', b'', b'hello'],
-            [b'', b'12.34', b''],
+            [b"", b"", b""],
+            [b"1", b"", b"hello"],
+            [b"", b"12.34", b""],
         ],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.FLOAT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([None, [1], None], pa.large_list(pa.int64())),
-            pa.array([None, None, [12.34]], pa.large_list(pa.float32())),
-            pa.array([None, [b'hello'], None], pa.large_list(
-                pa.large_binary())),
-        ], ['f1', 'f2', 'f3'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, [1], None], pa.large_list(pa.int64())),
+                pa.array([None, None, [12.34]], pa.large_list(pa.float32())),
+                pa.array([None, [b"hello"], None], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2", "f3"],
+        ),
+    ),
     dict(
-        testcase_name='mixed_int_and_float',
-        input_lines=['2,1.5', '1.5,2'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'2', b'1.5'], [b'1.5', b'2']],
+        testcase_name="mixed_int_and_float",
+        input_lines=["2,1.5", "1.5,2"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"2", b"1.5"], [b"1.5", b"2"]],
         expected_types=[
             csv_decoder.ColumnType.FLOAT,
             csv_decoder.ColumnType.FLOAT,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[2], [1.5]], pa.large_list(pa.float32())),
-            pa.array([[1.5], [2]], pa.large_list(pa.float32()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[2], [1.5]], pa.large_list(pa.float32())),
+                pa.array([[1.5], [2]], pa.large_list(pa.float32())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='mixed_int_and_string',
-        input_lines=['2,abc', 'abc,2'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'2', b'abc'], [b'abc', b'2']],
+        testcase_name="mixed_int_and_string",
+        input_lines=["2,abc", "abc,2"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"2", b"abc"], [b"abc", b"2"]],
         expected_types=[
             csv_decoder.ColumnType.STRING,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'2'], [b'abc']], pa.large_list(pa.large_binary())),
-            pa.array([[b'abc'], [b'2']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[b"2"], [b"abc"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"abc"], [b"2"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='mixed_float_and_string',
-        input_lines=['2.3,abc', 'abc,2.3'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'2.3', b'abc'], [b'abc', b'2.3']],
+        testcase_name="mixed_float_and_string",
+        input_lines=["2.3,abc", "abc,2.3"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"2.3", b"abc"], [b"abc", b"2.3"]],
         expected_types=[
             csv_decoder.ColumnType.STRING,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'2.3'], [b'abc']], pa.large_list(pa.large_binary())),
-            pa.array([[b'abc'], [b'2.3']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[b"2.3"], [b"abc"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"abc"], [b"2.3"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='unicode',
-        input_lines=[u'\U0001f951'],
-        column_names=['f1'],
-        expected_csv_cells=[[u'\U0001f951'.encode('utf-8')]],
+        testcase_name="unicode",
+        input_lines=["\U0001f951"],
+        column_names=["f1"],
+        expected_csv_cells=[["\U0001f951".encode()]],
         expected_types=[
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[u'\U0001f951'.encode('utf-8')]],
-                     pa.large_list(pa.large_binary()))
-        ], ['f1'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [pa.array([["\U0001f951".encode()]], pa.large_list(pa.large_binary()))],
+            ["f1"],
+        ),
+    ),
     dict(
-        testcase_name='quotes',
+        testcase_name="quotes",
         input_lines=['1,"ab,cd,ef"', '5,"wx,xy,yz"'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1', b'ab,cd,ef'], [b'5', b'wx,xy,yz']],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1", b"ab,cd,ef"], [b"5", b"wx,xy,yz"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[b'ab,cd,ef'], [b'wx,xy,yz']],
-                     pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array(
+                    [[b"ab,cd,ef"], [b"wx,xy,yz"]], pa.large_list(pa.large_binary())
+                ),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='space_delimiter',
+        testcase_name="space_delimiter",
         input_lines=['1 "ab,cd,ef"', '5 "wx,xy,yz"'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1', b'ab,cd,ef'], [b'5', b'wx,xy,yz']],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1", b"ab,cd,ef"], [b"5", b"wx,xy,yz"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[b'ab,cd,ef'], [b'wx,xy,yz']],
-                     pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        delimiter=' '),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array(
+                    [[b"ab,cd,ef"], [b"wx,xy,yz"]], pa.large_list(pa.large_binary())
+                ),
+            ],
+            ["f1", "f2"],
+        ),
+        delimiter=" ",
+    ),
     dict(
-        testcase_name='tab_delimiter',
-        input_lines=['1\t"this is a \ttext"', '5\t'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1', b'this is a \ttext'], [b'5', b'']],
+        testcase_name="tab_delimiter",
+        input_lines=['1\t"this is a \ttext"', "5\t"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1", b"this is a \ttext"], [b"5", b""]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[b'this is a \ttext'], None],
-                     pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        delimiter='\t'),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array(
+                    [[b"this is a \ttext"], None], pa.large_list(pa.large_binary())
+                ),
+            ],
+            ["f1", "f2"],
+        ),
+        delimiter="\t",
+    ),
     dict(
-        testcase_name='negative_values',
-        input_lines=['-1,-2.5'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'-1', b'-2.5']],
+        testcase_name="negative_values",
+        input_lines=["-1,-2.5"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"-1", b"-2.5"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.FLOAT,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[-1]], pa.large_list(pa.int64())),
-            pa.array([[-2.5]], pa.large_list(pa.float32()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[-1]], pa.large_list(pa.int64())),
+                pa.array([[-2.5]], pa.large_list(pa.float32())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='int64_boundary',
+        testcase_name="int64_boundary",
         input_lines=[
-            '%s,%s,%s,%s' % (
+            "%s,%s,%s,%s"
+            % (
                 str(np.iinfo(np.int64).min),
                 str(np.iinfo(np.int64).max),
                 str(np.iinfo(np.int64).min - 1),
                 str(np.iinfo(np.int64).max + 1),
             )
         ],
-        column_names=['int64min', 'int64max', 'int64min-1', 'int64max+1'],
-        expected_csv_cells=[[
-            b'-9223372036854775808', b'9223372036854775807',
-            b'-9223372036854775809', b'9223372036854775808'
-        ]],
+        column_names=["int64min", "int64max", "int64min-1", "int64max+1"],
+        expected_csv_cells=[
+            [
+                b"-9223372036854775808",
+                b"9223372036854775807",
+                b"-9223372036854775809",
+                b"9223372036854775808",
+            ]
+        ],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.STRING,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[-9223372036854775808]], pa.large_list(pa.int64())),
-            pa.array([[9223372036854775807]], pa.large_list(pa.int64())),
-            pa.array([[b'-9223372036854775809']],
-                     pa.large_list(pa.large_binary())),
-            pa.array([[b'9223372036854775808']], pa.large_list(
-                pa.large_binary()))
-        ], ['int64min', 'int64max', 'int64min-1', 'int64max+1'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[-9223372036854775808]], pa.large_list(pa.int64())),
+                pa.array([[9223372036854775807]], pa.large_list(pa.int64())),
+                pa.array([[b"-9223372036854775809"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"9223372036854775808"]], pa.large_list(pa.large_binary())),
+            ],
+            ["int64min", "int64max", "int64min-1", "int64max+1"],
+        ),
+    ),
     dict(
-        testcase_name='skip_blank_lines',
-        input_lines=['', '1,2'],
+        testcase_name="skip_blank_lines",
+        input_lines=["", "1,2"],
         skip_blank_lines=True,
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[], [b'1', b'2']],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[], [b"1", b"2"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.INT,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1]], pa.large_list(pa.int64())),
-            pa.array([[2]], pa.large_list(pa.int64()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1]], pa.large_list(pa.int64())),
+                pa.array([[2]], pa.large_list(pa.int64())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='consider_blank_lines',
-        input_lines=['', '1,2'],
+        testcase_name="consider_blank_lines",
+        input_lines=["", "1,2"],
         skip_blank_lines=False,
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[], [b'1', b'2']],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[], [b"1", b"2"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.INT,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([None, [1]], pa.large_list(pa.int64())),
-            pa.array([None, [2]], pa.large_list(pa.int64()))
-        ], ['f1', 'f2'])),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, [1]], pa.large_list(pa.int64())),
+                pa.array([None, [2]], pa.large_list(pa.int64())),
+            ],
+            ["f1", "f2"],
+        ),
+    ),
     dict(
-        testcase_name='skip_blank_lines_single_column',
-        input_lines=['', '1'],
+        testcase_name="skip_blank_lines_single_column",
+        input_lines=["", "1"],
         skip_blank_lines=True,
-        column_names=['f1'],
-        expected_csv_cells=[[], [b'1']],
+        column_names=["f1"],
+        expected_csv_cells=[[], [b"1"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
         ],
         expected_record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([[1]], pa.large_list(pa.int64()))], ['f1'])),
+            [pa.array([[1]], pa.large_list(pa.int64()))], ["f1"]
+        ),
+    ),
     dict(
-        testcase_name='consider_blank_lines_single_column',
-        input_lines=['', '1'],
+        testcase_name="consider_blank_lines_single_column",
+        input_lines=["", "1"],
         skip_blank_lines=False,
-        column_names=['f1'],
-        expected_csv_cells=[[], [b'1']],
+        column_names=["f1"],
+        expected_csv_cells=[[], [b"1"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
         ],
         expected_record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([None, [1]], pa.large_list(pa.int64()))], ['f1'])),
+            [pa.array([None, [1]], pa.large_list(pa.int64()))], ["f1"]
+        ),
+    ),
     dict(
-        testcase_name='empty_csv',
+        testcase_name="empty_csv",
         input_lines=[],
-        column_names=['f1'],
+        column_names=["f1"],
         expected_csv_cells=[],
         expected_types=[csv_decoder.ColumnType.UNKNOWN],
         expected_record_batch=[],
     ),
     dict(
-        testcase_name='null_column',
-        input_lines=['', ''],
-        column_names=['f1'],
+        testcase_name="null_column",
+        input_lines=["", ""],
+        column_names=["f1"],
         expected_csv_cells=[[], []],
         expected_types=[csv_decoder.ColumnType.UNKNOWN],
         expected_record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([None, None], pa.null())], ['f1'])),
+            [pa.array([None, None], pa.null())], ["f1"]
+        ),
+    ),
     dict(
-        testcase_name='size_2_vector_int_multivalent',
-        input_lines=['12|14'],
-        column_names=['x'],
-        expected_csv_cells=[[b'12|14']],
+        testcase_name="size_2_vector_int_multivalent",
+        input_lines=["12|14"],
+        column_names=["x"],
+        expected_csv_cells=[[b"12|14"]],
         expected_types=[csv_decoder.ColumnType.INT],
         expected_record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([[12, 14]], pa.large_list(pa.int64()))], ['x']),
-        delimiter=' ',
-        multivalent_columns=['x'],
-        secondary_delimiter='|'),
+            [pa.array([[12, 14]], pa.large_list(pa.int64()))], ["x"]
+        ),
+        delimiter=" ",
+        multivalent_columns=["x"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='space_and_comma_delimiter',
+        testcase_name="space_and_comma_delimiter",
         input_lines=['1,2 "abcdef"', '5,1 "wxxyyz"'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1,2', b'abcdef'], [b'5,1', b'wxxyyz']],
-        expected_types=[
-            csv_decoder.ColumnType.INT, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1, 2], [5, 1]], pa.large_list(pa.int64())),
-            pa.array([[b'abcdef'], [b'wxxyyz']], pa.large_list(
-                pa.large_binary()))
-        ], ['f1', 'f2']),
-        delimiter=' ',
-        multivalent_columns=['f1'],
-        secondary_delimiter=','),
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1,2", b"abcdef"], [b"5,1", b"wxxyyz"]],
+        expected_types=[csv_decoder.ColumnType.INT, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1, 2], [5, 1]], pa.large_list(pa.int64())),
+                pa.array([[b"abcdef"], [b"wxxyyz"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        delimiter=" ",
+        multivalent_columns=["f1"],
+        secondary_delimiter=",",
+    ),
     dict(
-        testcase_name='empty_multivalent_column',
-        input_lines=[',test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.UNKNOWN, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([None], pa.null()),
-            pa.array([[b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="empty_multivalent_column",
+        input_lines=[",test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"", b"test"]],
+        expected_types=[csv_decoder.ColumnType.UNKNOWN, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], pa.null()),
+                pa.array([[b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='empty_values_multivalent_column',
-        input_lines=['|,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'|', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.UNKNOWN, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([None], pa.null()),
-            pa.array([[b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="empty_values_multivalent_column",
+        input_lines=["|,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"|", b"test"]],
+        expected_types=[csv_decoder.ColumnType.UNKNOWN, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], pa.null()),
+                pa.array([[b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='empty_string_multivalent_column',
-        input_lines=['|,test', 'a|b,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'|', b'test'], [b'a|b', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'', b''], [b'a', b'b']], pa.large_list(
-                pa.large_binary())),
-            pa.array([[b'test'], [b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="empty_string_multivalent_column",
+        input_lines=["|,test", "a|b,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"|", b"test"], [b"a|b", b"test"]],
+        expected_types=[csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[b"", b""], [b"a", b"b"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"test"], [b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='int_and_float_multivalent_column',
-        input_lines=['1|2.3,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1|2.3', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.FLOAT, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1, 2.3]], pa.large_list(pa.float32())),
-            pa.array([[b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="int_and_float_multivalent_column",
+        input_lines=["1|2.3,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1|2.3", b"test"]],
+        expected_types=[csv_decoder.ColumnType.FLOAT, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1, 2.3]], pa.large_list(pa.float32())),
+                pa.array([[b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='float_and_string_multivalent_column',
-        input_lines=['2.3|abc,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'2.3|abc', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'2.3', b'abc']], pa.large_list(pa.large_binary())),
-            pa.array([[b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="float_and_string_multivalent_column",
+        input_lines=["2.3|abc,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"2.3|abc", b"test"]],
+        expected_types=[csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[b"2.3", b"abc"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='int_and_string_multivalent_column',
-        input_lines=['1|abc,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1|abc', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'1', b'abc']], pa.large_list(pa.large_binary())),
-            pa.array([[b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="int_and_string_multivalent_column",
+        input_lines=["1|abc,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1|abc", b"test"]],
+        expected_types=[csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[b"1", b"abc"]], pa.large_list(pa.large_binary())),
+                pa.array([[b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='int_and_string_multivalent_column_multiple_lines',
-        input_lines=['1|abc,test', '2|2,test'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1|abc', b'test'], [b'2|2', b'test']],
-        expected_types=[
-            csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING
-        ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b'1', b'abc'], [b'2', b'2']],
-                     pa.large_list(pa.large_binary())),
-            pa.array([[b'test'], [b'test']], pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2']),
-        multivalent_columns=['f1'],
-        secondary_delimiter='|'),
+        testcase_name="int_and_string_multivalent_column_multiple_lines",
+        input_lines=["1|abc,test", "2|2,test"],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1|abc", b"test"], [b"2|2", b"test"]],
+        expected_types=[csv_decoder.ColumnType.STRING, csv_decoder.ColumnType.STRING],
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[b"1", b"abc"], [b"2", b"2"]], pa.large_list(pa.large_binary())
+                ),
+                pa.array([[b"test"], [b"test"]], pa.large_list(pa.large_binary())),
+            ],
+            ["f1", "f2"],
+        ),
+        multivalent_columns=["f1"],
+        secondary_delimiter="|",
+    ),
     dict(
-        testcase_name='with_schema',
-        input_lines=['1,2.0,hello', '5,12.34,world'],
-        column_names=['int_feature', 'float_feature', 'str_feature'],
+        testcase_name="with_schema",
+        input_lines=["1,2.0,hello", "5,12.34,world"],
+        column_names=["int_feature", "float_feature", "str_feature"],
         expected_csv_cells=[
-            [b'1', b'2.0', b'hello'],
-            [b'5', b'12.34', b'world'],
+            [b"1", b"2.0", b"hello"],
+            [b"5", b"12.34", b"world"],
         ],
         expected_types=[
             csv_decoder.ColumnType.INT,
@@ -422,42 +498,52 @@
                 name: "str_feature"
                 type: BYTES
               }
-              """, schema_pb2.Schema()),
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
-            pa.array([[b'hello'], [b'world']],
-                     pa.large_list(pa.large_binary()))
-        ], ['int_feature', 'float_feature', 'str_feature'])),
+              """,
+            schema_pb2.Schema(),
+        ),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
+                pa.array([[b"hello"], [b"world"]], pa.large_list(pa.large_binary())),
+            ],
+            ["int_feature", "float_feature", "str_feature"],
+        ),
+    ),
     dict(
-        testcase_name='attach_raw_records',
-        input_lines=['1,2.0,hello', '5,12.34,world'],
-        column_names=['int_feature', 'float_feature', 'str_feature'],
+        testcase_name="attach_raw_records",
+        input_lines=["1,2.0,hello", "5,12.34,world"],
+        column_names=["int_feature", "float_feature", "str_feature"],
         expected_csv_cells=[
-            [b'1', b'2.0', b'hello'],
-            [b'5', b'12.34', b'world'],
+            [b"1", b"2.0", b"hello"],
+            [b"5", b"12.34", b"world"],
         ],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.FLOAT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
-            pa.array([[b'hello'], [b'world']], pa.large_list(
-                pa.large_binary())),
-            pa.array([[b'1,2.0,hello'], [b'5,12.34,world']],
-                     pa.large_list(pa.large_binary()))
-        ], ['int_feature', 'float_feature', 'str_feature', 'raw_records']),
-        raw_record_column_name='raw_records'),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
+                pa.array([[b"hello"], [b"world"]], pa.large_list(pa.large_binary())),
+                pa.array(
+                    [[b"1,2.0,hello"], [b"5,12.34,world"]],
+                    pa.large_list(pa.large_binary()),
+                ),
+            ],
+            ["int_feature", "float_feature", "str_feature", "raw_records"],
+        ),
+        raw_record_column_name="raw_records",
+    ),
     dict(
-        testcase_name='with_schema_attach_raw_record',
-        input_lines=['1,2.0,hello', '5,12.34,world'],
-        column_names=['int_feature', 'float_feature', 'str_feature'],
+        testcase_name="with_schema_attach_raw_record",
+        input_lines=["1,2.0,hello", "5,12.34,world"],
+        column_names=["int_feature", "float_feature", "str_feature"],
         expected_csv_cells=[
-            [b'1', b'2.0', b'hello'],
-            [b'5', b'12.34', b'world'],
+            [b"1", b"2.0", b"hello"],
+            [b"5", b"12.34", b"world"],
         ],
         expected_types=[
             csv_decoder.ColumnType.INT,
@@ -477,236 +563,293 @@
                 name: "str_feature"
                 type: BYTES
               }
-              """, schema_pb2.Schema()),
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1], [5]], pa.large_list(pa.int64())),
-            pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
-            pa.array([[b'hello'], [b'world']],
-                     pa.large_list(pa.large_binary())),
-            pa.array([[b'1,2.0,hello'], [b'5,12.34,world']],
-                     pa.large_list(pa.large_binary()))
-        ], ['int_feature', 'float_feature', 'str_feature', 'raw_records'
-           ]),
-        raw_record_column_name='raw_records'),
+              """,
+            schema_pb2.Schema(),
+        ),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [5]], pa.large_list(pa.int64())),
+                pa.array([[2.0], [12.34]], pa.large_list(pa.float32())),
+                pa.array([[b"hello"], [b"world"]], pa.large_list(pa.large_binary())),
+                pa.array(
+                    [[b"1,2.0,hello"], [b"5,12.34,world"]],
+                    pa.large_list(pa.large_binary()),
+                ),
+            ],
+            ["int_feature", "float_feature", "str_feature", "raw_records"],
+        ),
+        raw_record_column_name="raw_records",
+    ),
     dict(
-        testcase_name='multivalent_attach_raw_records',
+        testcase_name="multivalent_attach_raw_records",
         input_lines=['1,2 "abcdef"', '5,1 "wxxyyz"'],
-        column_names=['f1', 'f2'],
-        expected_csv_cells=[[b'1,2', b'abcdef'], [b'5,1', b'wxxyyz']],
+        column_names=["f1", "f2"],
+        expected_csv_cells=[[b"1,2", b"abcdef"], [b"5,1", b"wxxyyz"]],
         expected_types=[
             csv_decoder.ColumnType.INT,
             csv_decoder.ColumnType.STRING,
         ],
-        expected_record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[1, 2], [5, 1]], pa.large_list(pa.int64())),
-            pa.array([[b'abcdef'], [b'wxxyyz']], pa.large_list(
-                pa.large_binary())),
-            pa.array([[b'1,2 "abcdef"'], [b'5,1 "wxxyyz"']],
-                     pa.large_list(pa.large_binary()))
-        ], ['f1', 'f2', 'raw_records']),
-        delimiter=' ',
-        multivalent_columns=['f1'],
-        secondary_delimiter=',',
-        raw_record_column_name='raw_records'),
+        expected_record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1, 2], [5, 1]], pa.large_list(pa.int64())),
+                pa.array([[b"abcdef"], [b"wxxyyz"]], pa.large_list(pa.large_binary())),
+                pa.array(
+                    [[b'1,2 "abcdef"'], [b'5,1 "wxxyyz"']],
+                    pa.large_list(pa.large_binary()),
+                ),
+            ],
+            ["f1", "f2", "raw_records"],
+        ),
+        delimiter=" ",
+        multivalent_columns=["f1"],
+        secondary_delimiter=",",
+        raw_record_column_name="raw_records",
+    ),
 ]
 
 
 class CSVDecoderTest(parameterized.TestCase):
-  """Tests for CSV decoder."""
-
-  @parameterized.named_parameters(_TEST_CASES)
-  def test_parse_csv_lines(self,
-                           input_lines,
-                           column_names,
-                           expected_csv_cells,
-                           expected_types,
-                           expected_record_batch,
-                           skip_blank_lines=False,
-                           schema=None,
-                           delimiter=',',
-                           multivalent_columns=None,
-                           secondary_delimiter=None,
-                           raw_record_column_name=None):
+    """Tests for CSV decoder."""
 
-    if self._testMethodName in [
-        "test_parse_csv_lines_attach_raw_records",
-        "test_parse_csv_lines_consider_blank_lines",
-        "test_parse_csv_lines_consider_blank_lines_single_column",
-        "test_parse_csv_lines_empty_csv",
-        "test_parse_csv_lines_empty_multivalent_column",
-        "test_parse_csv_lines_empty_string_multivalent_column",
-        "test_parse_csv_lines_empty_values_multivalent_column",
-        "test_parse_csv_lines_float_and_string_multivalent_column",
-        "test_parse_csv_lines_int64_boundary",
-        "test_parse_csv_lines_int_and_float_multivalent_column",
-        "test_parse_csv_lines_int_and_string_multivalent_column",
-        "test_parse_csv_lines_int_and_string_multivalent_column_multiple_lines",
-        "test_parse_csv_lines_missing_values",
-        "test_parse_csv_lines_mixed_float_and_string",
-        "test_parse_csv_lines_mixed_int_and_float",
-        "test_parse_csv_lines_mixed_int_and_string",
-        "test_parse_csv_lines_multivalent_attach_raw_records",
-        "test_parse_csv_lines_negative_values",
-        "test_parse_csv_lines_null_column",
-        "test_parse_csv_lines_quotes",
-        "test_parse_csv_lines_simple",
-        "test_parse_csv_lines_size_2_vector_int_multivalent",
-        "test_parse_csv_lines_skip_blank_lines",
-        "test_parse_csv_lines_skip_blank_lines_single_column",
-        "test_parse_csv_lines_space_and_comma_delimiter",
-        "test_parse_csv_lines_space_delimiter",
-        "test_parse_csv_lines_tab_delimiter",
-        "test_parse_csv_lines_unicode",
-        "test_parse_csv_lines_with_schema",
-        "test_parse_csv_lines_with_schema_attach_raw_record",
-    ]:
-      pytest.xfail(reason="Test fails and needs to be fixed. ")
+    @parameterized.named_parameters(_TEST_CASES)
+    def test_parse_csv_lines(
+        self,
+        input_lines,
+        column_names,
+        expected_csv_cells,
+        expected_types,
+        expected_record_batch,
+        skip_blank_lines=False,
+        schema=None,
+        delimiter=",",
+        multivalent_columns=None,
+        secondary_delimiter=None,
+        raw_record_column_name=None,
+    ):
+        if self._testMethodName in [
+            "test_parse_csv_lines_attach_raw_records",
+            "test_parse_csv_lines_consider_blank_lines",
+            "test_parse_csv_lines_consider_blank_lines_single_column",
+            "test_parse_csv_lines_empty_csv",
+            "test_parse_csv_lines_empty_multivalent_column",
+            "test_parse_csv_lines_empty_string_multivalent_column",
+            "test_parse_csv_lines_empty_values_multivalent_column",
+            "test_parse_csv_lines_float_and_string_multivalent_column",
+            "test_parse_csv_lines_int64_boundary",
+            "test_parse_csv_lines_int_and_float_multivalent_column",
+            "test_parse_csv_lines_int_and_string_multivalent_column",
+            "test_parse_csv_lines_int_and_string_multivalent_column_multiple_lines",
+            "test_parse_csv_lines_missing_values",
+            "test_parse_csv_lines_mixed_float_and_string",
+            "test_parse_csv_lines_mixed_int_and_float",
+            "test_parse_csv_lines_mixed_int_and_string",
+            "test_parse_csv_lines_multivalent_attach_raw_records",
+            "test_parse_csv_lines_negative_values",
+            "test_parse_csv_lines_null_column",
+            "test_parse_csv_lines_quotes",
+            "test_parse_csv_lines_simple",
+            "test_parse_csv_lines_size_2_vector_int_multivalent",
+            "test_parse_csv_lines_skip_blank_lines",
+            "test_parse_csv_lines_skip_blank_lines_single_column",
+            "test_parse_csv_lines_space_and_comma_delimiter",
+            "test_parse_csv_lines_space_delimiter",
+            "test_parse_csv_lines_tab_delimiter",
+            "test_parse_csv_lines_unicode",
+            "test_parse_csv_lines_with_schema",
+            "test_parse_csv_lines_with_schema_attach_raw_record",
+        ]:
+            pytest.xfail(reason="Test fails and needs to be fixed. ")
 
-    def _check_csv_cells(actual):
-      for i in range(len(actual)):
-        self.assertEqual(expected_csv_cells[i], actual[i][0])
-        self.assertEqual(input_lines[i], actual[i][1])
+        def _check_csv_cells(actual):
+            for i in range(len(actual)):
+                self.assertEqual(expected_csv_cells[i], actual[i][0])
+                self.assertEqual(input_lines[i], actual[i][1])
 
-    def _check_types(actual):
-      self.assertLen(actual, 1)
-      self.assertCountEqual([
-          csv_decoder.ColumnInfo(n, t)
-          for n, t in zip(column_names, expected_types)
-      ], actual[0])
+        def _check_types(actual):
+            self.assertLen(actual, 1)
+            self.assertCountEqual(
+                [
+                    csv_decoder.ColumnInfo(n, t)
+                    for n, t in zip(column_names, expected_types)
+                ],
+                actual[0],
+            )
 
-    def _check_record_batches(actual):
-      """Compares a list of pa.RecordBatch."""
-      if actual:
-        self.assertTrue(actual[0].equals(expected_record_batch))
-      else:
-        self.assertEqual(expected_record_batch, actual)
+        def _check_record_batches(actual):
+            """Compares a list of pa.RecordBatch."""
+            if actual:
+                self.assertTrue(actual[0].equals(expected_record_batch))
+            else:
+                self.assertEqual(expected_record_batch, actual)
 
-    def _check_arrow_schema(actual):
-      for record_batch in actual:
-        expected_arrow_schema = csv_decoder.GetArrowSchema(
-            column_names, schema, raw_record_column_name)
-        self.assertEqual(record_batch.schema, expected_arrow_schema)
+        def _check_arrow_schema(actual):
+            for record_batch in actual:
+                expected_arrow_schema = csv_decoder.GetArrowSchema(
+                    column_names, schema, raw_record_column_name
+                )
+                self.assertEqual(record_batch.schema, expected_arrow_schema)
 
-    with beam.Pipeline() as p:
-      parsed_csv_cells_and_raw_records = (
-          p | beam.Create(input_lines, reshuffle=False)
-          | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=delimiter)))
-      inferred_types = (
-          parsed_csv_cells_and_raw_records
-          | beam.Keys()
-          | beam.CombineGlobally(
-              csv_decoder.ColumnTypeInferrer(
-                  column_names,
-                  skip_blank_lines=skip_blank_lines,
-                  multivalent_columns=multivalent_columns,
-                  secondary_delimiter=secondary_delimiter)))
+        with beam.Pipeline() as p:
+            parsed_csv_cells_and_raw_records = (
+                p
+                | beam.Create(input_lines, reshuffle=False)
+                | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=delimiter))
+            )
+            inferred_types = (
+                parsed_csv_cells_and_raw_records
+                | beam.Keys()
+                | beam.CombineGlobally(
+                    csv_decoder.ColumnTypeInferrer(
+                        column_names,
+                        skip_blank_lines=skip_blank_lines,
+                        multivalent_columns=multivalent_columns,
+                        secondary_delimiter=secondary_delimiter,
+                    )
+                )
+            )
 
-      beam_test_util.assert_that(
-          parsed_csv_cells_and_raw_records,
-          _check_csv_cells,
-          label='check_parsed_csv_cells')
-      beam_test_util.assert_that(
-          inferred_types, _check_types, label='check_types')
+            beam_test_util.assert_that(
+                parsed_csv_cells_and_raw_records,
+                _check_csv_cells,
+                label="check_parsed_csv_cells",
+            )
+            beam_test_util.assert_that(
+                inferred_types, _check_types, label="check_types"
+            )
 
-      record_batches = (
-          parsed_csv_cells_and_raw_records
-          | beam.BatchElements(min_batch_size=1000) | beam.ParDo(
-              csv_decoder.BatchedCSVRowsToRecordBatch(
-                  skip_blank_lines=skip_blank_lines,
-                  multivalent_columns=multivalent_columns,
-                  secondary_delimiter=secondary_delimiter,
-                  raw_record_column_name=raw_record_column_name),
-              beam.pvalue.AsSingleton(inferred_types)))
-      beam_test_util.assert_that(
-          record_batches, _check_record_batches, label='check_record_batches')
-      if schema:
-        beam_test_util.assert_that(
-            record_batches, _check_arrow_schema, label='check_arrow_schema')
+            record_batches = (
+                parsed_csv_cells_and_raw_records
+                | beam.BatchElements(min_batch_size=1000)
+                | beam.ParDo(
+                    csv_decoder.BatchedCSVRowsToRecordBatch(
+                        skip_blank_lines=skip_blank_lines,
+                        multivalent_columns=multivalent_columns,
+                        secondary_delimiter=secondary_delimiter,
+                        raw_record_column_name=raw_record_column_name,
+                    ),
+                    beam.pvalue.AsSingleton(inferred_types),
+                )
+            )
+            beam_test_util.assert_that(
+                record_batches, _check_record_batches, label="check_record_batches"
+            )
+            if schema:
+                beam_test_util.assert_that(
+                    record_batches, _check_arrow_schema, label="check_arrow_schema"
+                )
 
-    # Testing CSVToRecordBatch
-    with beam.Pipeline() as p:
-      record_batches = (
-          p | 'CreatingPColl' >> beam.Create(input_lines, reshuffle=False)
-          | 'CSVToRecordBatch' >> csv_decoder.CSVToRecordBatch(
-              column_names=column_names,
-              delimiter=delimiter,
-              skip_blank_lines=skip_blank_lines,
-              desired_batch_size=1000,
-              schema=schema,
-              multivalent_columns=multivalent_columns,
-              secondary_delimiter=secondary_delimiter,
-              raw_record_column_name=raw_record_column_name))
-      beam_test_util.assert_that(
-          record_batches, _check_record_batches, label='check_record_batches')
+        # Testing CSVToRecordBatch
+        with beam.Pipeline() as p:
+            record_batches = (
+                p
+                | "CreatingPColl" >> beam.Create(input_lines, reshuffle=False)
+                | "CSVToRecordBatch"
+                >> csv_decoder.CSVToRecordBatch(
+                    column_names=column_names,
+                    delimiter=delimiter,
+                    skip_blank_lines=skip_blank_lines,
+                    desired_batch_size=1000,
+                    schema=schema,
+                    multivalent_columns=multivalent_columns,
+                    secondary_delimiter=secondary_delimiter,
+                    raw_record_column_name=raw_record_column_name,
+                )
+            )
+            beam_test_util.assert_that(
+                record_batches, _check_record_batches, label="check_record_batches"
+            )
 
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def test_csv_to_recordbatch_schema_features_subset_of_column_names(self):
-    input_lines = ['1,2.0,hello', '5,12.34,world']
-    column_names = ['int_feature', 'float_feature', 'str_feature']
-    schema = text_format.Parse("""feature { name: "int_feature" type: INT }""",
-                               schema_pb2.Schema())
-    self.assertEqual(
-        csv_decoder.GetArrowSchema(column_names, schema),
-        pa.schema([pa.field('int_feature', pa.large_list(pa.int64()))]))
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def test_csv_to_recordbatch_schema_features_subset_of_column_names(self):
+        input_lines = ["1,2.0,hello", "5,12.34,world"]
+        column_names = ["int_feature", "float_feature", "str_feature"]
+        schema = text_format.Parse(
+            """feature { name: "int_feature" type: INT }""", schema_pb2.Schema()
+        )
+        self.assertEqual(
+            csv_decoder.GetArrowSchema(column_names, schema),
+            pa.schema([pa.field("int_feature", pa.large_list(pa.int64()))]),
+        )
 
-    def _check_record_batches(record_batches):
-      self.assertLen(record_batches, 1)
-      self.assertTrue(record_batches[0].equals(
-          pa.RecordBatch.from_arrays(
-              [pa.array([[1], [5]], pa.large_list(pa.int64()))],
-              ['int_feature'])))
+        def _check_record_batches(record_batches):
+            self.assertLen(record_batches, 1)
+            self.assertTrue(
+                record_batches[0].equals(
+                    pa.RecordBatch.from_arrays(
+                        [pa.array([[1], [5]], pa.large_list(pa.int64()))],
+                        ["int_feature"],
+                    )
+                )
+            )
 
-    with beam.Pipeline() as p:
-      record_batches = (
-          p | 'CreatingPColl' >> beam.Create(input_lines, reshuffle=False)
-          | 'CSVToRecordBatch' >> csv_decoder.CSVToRecordBatch(
-              column_names=column_names,
-              delimiter=',',
-              desired_batch_size=1000,
-              schema=schema))
-      beam_test_util.assert_that(
-          record_batches, _check_record_batches, label='check_record_batches')
+        with beam.Pipeline() as p:
+            record_batches = (
+                p
+                | "CreatingPColl" >> beam.Create(input_lines, reshuffle=False)
+                | "CSVToRecordBatch"
+                >> csv_decoder.CSVToRecordBatch(
+                    column_names=column_names,
+                    delimiter=",",
+                    desired_batch_size=1000,
+                    schema=schema,
+                )
+            )
+            beam_test_util.assert_that(
+                record_batches, _check_record_batches, label="check_record_batches"
+            )
 
-  def test_invalid_row(self):
-    input_lines = ['1,2.0,hello', '5,12.34']
-    column_names = ['int_feature', 'float_feature', 'str_feature']
-    with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
-        ValueError, '.*Columns do not match specified csv headers.*'):
-      with beam.Pipeline() as p:
-        result = (
-            p | beam.Create(input_lines, reshuffle=False)
-            | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=','))
-            | beam.Keys()
-            | beam.CombineGlobally(
-                csv_decoder.ColumnTypeInferrer(
-                    column_names, skip_blank_lines=False)))
-        beam_test_util.assert_that(result, lambda _: None)
+    def test_invalid_row(self):
+        input_lines = ["1,2.0,hello", "5,12.34"]
+        column_names = ["int_feature", "float_feature", "str_feature"]
+        with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
+            ValueError, ".*Columns do not match specified csv headers.*"
+        ):
+            with beam.Pipeline() as p:
+                result = (
+                    p
+                    | beam.Create(input_lines, reshuffle=False)
+                    | beam.ParDo(csv_decoder.ParseCSVLine(delimiter=","))
+                    | beam.Keys()
+                    | beam.CombineGlobally(
+                        csv_decoder.ColumnTypeInferrer(
+                            column_names, skip_blank_lines=False
+                        )
+                    )
+                )
+                beam_test_util.assert_that(result, lambda _: None)
 
-  def test_invalid_schema_type(self):
-    input_lines = ['1']
-    column_names = ['f1']
-    schema = text_format.Parse(
-        """
+    def test_invalid_schema_type(self):
+        input_lines = ["1"]
+        column_names = ["f1"]
+        schema = text_format.Parse(
+            """
               feature {
                 name: "struct_feature"
                 type: STRUCT
               }
-              """, schema_pb2.Schema())
-    with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
-        ValueError, '.*Schema contains invalid type: STRUCT.*'):
-      with beam.Pipeline() as p:
-        result = (
-            p | beam.Create(input_lines, reshuffle=False)
-            | 'CSVToRecordBatch' >> csv_decoder.CSVToRecordBatch(
-                column_names=column_names,
-                schema=schema,
-                desired_batch_size=1000))
-        beam_test_util.assert_that(result, lambda _: None)
+              """,
+            schema_pb2.Schema(),
+        )
+        with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
+            ValueError, ".*Schema contains invalid type: STRUCT.*"
+        ):
+            with beam.Pipeline() as p:
+                result = (
+                    p
+                    | beam.Create(input_lines, reshuffle=False)
+                    | "CSVToRecordBatch"
+                    >> csv_decoder.CSVToRecordBatch(
+                        column_names=column_names,
+                        schema=schema,
+                        desired_batch_size=1000,
+                    )
+                )
+                beam_test_util.assert_that(result, lambda _: None)
 
-  def test_invalid_raw_record_column_name(self):
-    input_lines = ['1,2.0,hello', '5,12.34']
-    schema = text_format.Parse(
-        """
+    def test_invalid_raw_record_column_name(self):
+        input_lines = ["1,2.0,hello", "5,12.34"]
+        schema = text_format.Parse(
+            """
               feature {
                 name: "int_feature"
                 type: INT
@@ -719,26 +862,35 @@ def test_invalid_raw_record_column_name(self):
                 name: "str_feature"
                 type: BYTES
               }
-              """, schema_pb2.Schema())
-    column_names = ['int_feature', 'float_feature', 'str_feature']
-    with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
-        ValueError, 'raw_record_column_name.* is already an existing column.*'):
-      with beam.Pipeline() as p:
-        result = (
-            p | beam.Create(input_lines, reshuffle=False)
-            | 'CSVToRecordBatch' >> csv_decoder.CSVToRecordBatch(
-                column_names=column_names,
-                desired_batch_size=1000,
-                raw_record_column_name='int_feature'))
-        beam_test_util.assert_that(result, lambda _: None)
-    with self.assertRaisesRegex(
-        ValueError, 'raw_record_column_name.* is already an existing column.*'):
-      csv_decoder.GetArrowSchema(
-          column_names, schema, raw_record_column_name='int_feature')
+              """,
+            schema_pb2.Schema(),
+        )
+        column_names = ["int_feature", "float_feature", "str_feature"]
+        with self.assertRaisesRegex(  # pylint: disable=g-error-prone-assert-raises
+            ValueError, "raw_record_column_name.* is already an existing column.*"
+        ):
+            with beam.Pipeline() as p:
+                result = (
+                    p
+                    | beam.Create(input_lines, reshuffle=False)
+                    | "CSVToRecordBatch"
+                    >> csv_decoder.CSVToRecordBatch(
+                        column_names=column_names,
+                        desired_batch_size=1000,
+                        raw_record_column_name="int_feature",
+                    )
+                )
+                beam_test_util.assert_that(result, lambda _: None)
+        with self.assertRaisesRegex(
+            ValueError, "raw_record_column_name.* is already an existing column.*"
+        ):
+            csv_decoder.GetArrowSchema(
+                column_names, schema, raw_record_column_name="int_feature"
+            )
 
-  def test_get_arrow_schema_schema_feature_not_subset_of_column_names(self):
-    schema = text_format.Parse(
-        """
+    def test_get_arrow_schema_schema_feature_not_subset_of_column_names(self):
+        schema = text_format.Parse(
+            """
               feature {
                 name: "f1"
                 type: INT
@@ -747,12 +899,15 @@ def test_get_arrow_schema_schema_feature_not_subset_of_column_names(self):
                 name: "f2"
                 type: INT
               }
-              """, schema_pb2.Schema())
-    column_names = ['f1']
-    with self.assertRaisesRegex(
-        ValueError, 'Schema features are not a subset of column names'):
-      csv_decoder.GetArrowSchema(column_names, schema)
+              """,
+            schema_pb2.Schema(),
+        )
+        column_names = ["f1"]
+        with self.assertRaisesRegex(
+            ValueError, "Schema features are not a subset of column names"
+        ):
+            csv_decoder.GetArrowSchema(column_names, schema)
 
 
-if __name__ == '__main__':
-  absltest.main()
+if __name__ == "__main__":
+    absltest.main()
diff --git a/tfx_bsl/coders/example_coder.py b/tfx_bsl/coders/example_coder.py
index 5e41ad92..41973efa 100644
--- a/tfx_bsl/coders/example_coder.py
+++ b/tfx_bsl/coders/example_coder.py
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Example coders."""
-from typing import List, Optional, Type, Tuple
 
-import pyarrow as pa
+from typing import List, Optional, Tuple, Type
 
+import pyarrow as pa
 from tensorflow_metadata.proto.v0 import schema_pb2
 
 # pylint: disable=unused-import
@@ -23,85 +23,93 @@
 # pylint: disable=g-import-not-at-top
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.coders import ExamplesToRecordBatchDecoder as ExamplesToRecordBatchDecoderCpp
-  from tfx_bsl.cc.tfx_bsl_extension.coders import ExampleToNumpyDict
-  from tfx_bsl.cc.tfx_bsl_extension.coders import RecordBatchToExamplesEncoder as RecordBatchToExamplesEncoderCpp
+    from tfx_bsl.cc.tfx_bsl_extension.coders import (
+        ExamplesToRecordBatchDecoder as ExamplesToRecordBatchDecoderCpp,
+    )
+    from tfx_bsl.cc.tfx_bsl_extension.coders import ExampleToNumpyDict
+    from tfx_bsl.cc.tfx_bsl_extension.coders import (
+        RecordBatchToExamplesEncoder as RecordBatchToExamplesEncoderCpp,
+    )
 except ImportError:
-  import sys
-  sys.stderr.write("Error importing tfx_bsl_extension.coders. "
-                   "Some tfx_bsl functionalities are not available")
+    import sys
+
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.coders. "
+        "Some tfx_bsl functionalities are not available"
+    )
 # pylint: enable=g-import-not-at-top
 # pytype: enable=import-error
 # pylint: enable=unused-import
 
 
 class RecordBatchToExamplesEncoder:
-  """Encodes `pa.RecordBatch` as a list of serialized `tf.Example`s.
+    """Encodes `pa.RecordBatch` as a list of serialized `tf.Example`s.
 
-  Requires TFMD schema only if RecordBatches contains nested lists with
-  depth > 2 that represent TensorFlow's RaggedFeatures.
-  """
+    Requires TFMD schema only if RecordBatches contains nested lists with
+    depth > 2 that represent TensorFlow's RaggedFeatures.
+    """
 
-  __slots__ = ["_schema", "_coder"]
+    __slots__ = ["_schema", "_coder"]
 
-  def __init__(self, schema: Optional[schema_pb2.Schema] = None):
-    self._schema = schema
-    self._coder = RecordBatchToExamplesEncoderCpp(
-        None if schema is None else schema.SerializeToString()
-    )
+    def __init__(self, schema: Optional[schema_pb2.Schema] = None):
+        self._schema = schema
+        self._coder = RecordBatchToExamplesEncoderCpp(
+            None if schema is None else schema.SerializeToString()
+        )
 
-  def __reduce__(
-      self,
-  ) -> Tuple[
-      Type["RecordBatchToExamplesEncoder"], Tuple[Optional[schema_pb2.Schema]]
-  ]:
-    return (self.__class__, (self._schema,))
+    def __reduce__(
+        self,
+    ) -> Tuple[
+        Type["RecordBatchToExamplesEncoder"], Tuple[Optional[schema_pb2.Schema]]
+    ]:
+        return (self.__class__, (self._schema,))
 
-  def encode(self, record_batch: pa.RecordBatch) -> List[bytes]:  # pylint: disable=invalid-name
-    return self._coder.Encode(record_batch)
+    def encode(self, record_batch: pa.RecordBatch) -> List[bytes]:  # pylint: disable=invalid-name
+        return self._coder.Encode(record_batch)
 
 
 # TODO(b/271883540) Deprecate this.
 def RecordBatchToExamples(record_batch: pa.RecordBatch) -> List[bytes]:
-  """Stateless version of the encoder above."""
-  return RecordBatchToExamplesEncoder().encode(record_batch)
+    """Stateless version of the encoder above."""
+    return RecordBatchToExamplesEncoder().encode(record_batch)
 
 
 class ExamplesToRecordBatchDecoder:
-  """Decodes a list of serialized `tf.Example`s into `pa.RecordBatch`.
+    """Decodes a list of serialized `tf.Example`s into `pa.RecordBatch`.
 
-  If a schema is provided then the record batch will contain only the fields
-  from the schema, in the same order as the Schema.  The data type of the
-  schema to determine the field types, with INT, BYTES and FLOAT fields in the
-  schema corresponding to the Arrow data types large_list[int64],
-  large_list[large_binary] and large_list[float32].
+    If a schema is provided then the record batch will contain only the fields
+    from the schema, in the same order as the Schema.  The data type of the
+    schema to determine the field types, with INT, BYTES and FLOAT fields in the
+    schema corresponding to the Arrow data types large_list[int64],
+    large_list[large_binary] and large_list[float32].
 
-  If a schema is not provided then the data type will be inferred, and chosen
-  from list_type[int64], list_type[binary_type] and list_type[float32].  In the
-  case where no data type can be inferred the arrow null type will be inferred.
+    If a schema is not provided then the data type will be inferred, and chosen
+    from list_type[int64], list_type[binary_type] and list_type[float32].  In the
+    case where no data type can be inferred the arrow null type will be inferred.
 
-  This class wraps pybind11 class `ExamplesToRecordBatchDecoder` to make the
-  class and its member functions picklable.
-  """
+    This class wraps pybind11 class `ExamplesToRecordBatchDecoder` to make the
+    class and its member functions picklable.
+    """
 
-  __slots__ = ["_schema", "_coder"]
+    __slots__ = ["_schema", "_coder"]
 
-  def __init__(self, serialized_schema: Optional[bytes] = None):
-    """Initializes ExamplesToRecordBatchDecoder.
+    def __init__(self, serialized_schema: Optional[bytes] = None):
+        """Initializes ExamplesToRecordBatchDecoder.
 
-    Args:
-      serialized_schema: A serialized TFMD schema.
-    """
-    self._schema = serialized_schema
-    self._coder = ExamplesToRecordBatchDecoderCpp(serialized_schema)
+        Args:
+        ----
+          serialized_schema: A serialized TFMD schema.
+        """
+        self._schema = serialized_schema
+        self._coder = ExamplesToRecordBatchDecoderCpp(serialized_schema)
 
-  def __reduce__(
-      self
-  ) -> Tuple[Type["ExamplesToRecordBatchDecoder"], Tuple[Optional[bytes]]]:
-    return (self.__class__, (self._schema,))
+    def __reduce__(
+        self,
+    ) -> Tuple[Type["ExamplesToRecordBatchDecoder"], Tuple[Optional[bytes]]]:
+        return (self.__class__, (self._schema,))
 
-  def DecodeBatch(self, examples: List[bytes]) -> pa.RecordBatch:
-    return self._coder.DecodeBatch(examples)
+    def DecodeBatch(self, examples: List[bytes]) -> pa.RecordBatch:
+        return self._coder.DecodeBatch(examples)
 
-  def ArrowSchema(self) -> pa.Schema:
-    return self._coder.ArrowSchema()
+    def ArrowSchema(self) -> pa.Schema:
+        return self._coder.ArrowSchema()
diff --git a/tfx_bsl/coders/example_coder_test.py b/tfx_bsl/coders/example_coder_test.py
index 8c874fd8..c77c2b43 100644
--- a/tfx_bsl/coders/example_coder_test.py
+++ b/tfx_bsl/coders/example_coder_test.py
@@ -12,17 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for tfx_bsl.coders.example_coder."""
+
 import pickle
+
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.coders import example_coder
-from tfx_bsl.tfxio import tensor_representation_util
-
+from absl.testing import absltest, parameterized
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import example_coder
+from tfx_bsl.tfxio import tensor_representation_util
+
 _TEST_EXAMPLES = [
     """
    features {
@@ -64,16 +65,22 @@
         testcase_name="without_schema_simple",
         schema_text_proto=None,
         examples_text_proto=_TEST_EXAMPLES,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None, [1.0], None],
-                     type=pa.large_list(pa.float32())),
-            pa.array([None, None, None, None], type=pa.null()),
-            pa.array([[b"a", b"b"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-            pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64()))
-        ], ["v", "w", "x", "y", "z"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None, [1.0], None], type=pa.large_list(pa.float32())),
+                pa.array([None, None, None, None], type=pa.null()),
+                pa.array(
+                    [[b"a", b"b"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+                pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64())),
+            ],
+            ["v", "w", "x", "y", "z"],
+        ),
+    ),
     dict(
         testcase_name="with_schema_simple",
         schema_text_proto="""
@@ -90,13 +97,20 @@
           type: INT
         }""",
         examples_text_proto=_TEST_EXAMPLES,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([[b"a", b"b"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-            pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64()))
-        ], ["x", "y", "z"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[b"a", b"b"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+                pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64())),
+            ],
+            ["x", "y", "z"],
+        ),
+    ),
     dict(
         testcase_name="ignore_features_not_in_schema",
         schema_text_proto="""
@@ -110,12 +124,19 @@
         }
         """,
         examples_text_proto=_TEST_EXAMPLES,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([[b"a", b"b"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-        ], ["x", "y"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[b"a", b"b"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+            ],
+            ["x", "y"],
+        ),
+    ),
     dict(
         testcase_name="build_nulls_for_unseen_feature",
         schema_text_proto="""
@@ -125,10 +146,15 @@
         }
         """,
         examples_text_proto=_TEST_EXAMPLES,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None, None, None],
-                     type=pa.large_list(pa.large_binary())),
-        ], ["a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [None, None, None, None], type=pa.large_list(pa.large_binary())
+                ),
+            ],
+            ["a"],
+        ),
+    ),
     dict(
         testcase_name="build_null_for_unset_kind",
         schema_text_proto="""
@@ -142,9 +168,13 @@
         features { feature { key: "a" value { } } }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.large_list(pa.large_binary())),
-        ], ["a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.large_list(pa.large_binary())),
+            ],
+            ["a"],
+        ),
+    ),
     dict(
         testcase_name="duplicate_feature_names_in_schema",
         schema_text_proto="""
@@ -163,9 +193,13 @@
         features { feature { key: "a" value { } } }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.large_list(pa.large_binary())),
-        ], ["a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.large_list(pa.large_binary())),
+            ],
+            ["a"],
+        ),
+    ),
 ]
 
 _INVALID_INPUT_CASES = [
@@ -185,7 +219,8 @@
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected bytes_list, found float_list "
-            "for feature \"a\""),
+            'for feature "a"'
+        ),
     ),
     dict(
         testcase_name="no_schema_mixed_type",
@@ -193,94 +228,99 @@
         examples_text_proto=[
             """
         features { feature { key: "a" value { float_list { value: [] } } } }
-        """, """
+        """,
+            """
         features { feature { key: "a" value { int64_list { value: [] } } } }
-        """
+        """,
         ],
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected float_list, found int64_list"
-            " for feature \"a\""),
+            ' for feature "a"'
+        ),
     ),
 ]
 
 
 class ExamplesToRecordBatchDecoderTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(*_DECODE_CASES)
-  def test_decode(self, schema_text_proto, examples_text_proto, expected):
-    serialized_examples = [
-        text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
-        for pbtxt in examples_text_proto
-    ]
-    serialized_schema = None
-    if schema_text_proto is not None:
-      serialized_schema = text_format.Parse(
-          schema_text_proto, schema_pb2.Schema()).SerializeToString()
-
-    coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
-
-    result = coder.DecodeBatch(serialized_examples)
-    self.assertIsInstance(result, pa.RecordBatch)
-    self.assertTrue(
-        result.equals(expected),
-        (
-            f"\nactual: {result.to_pydict()}\nactual schema:"
-            f" {result.schema}\nexpected:{expected.to_pydict()}\nexpected"
-            f" schema: {expected.schema}\nencoded: {serialized_examples}"
-        ),
-    )
-    if serialized_schema:
-      self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
-
-    # Verify that coder and DecodeBatch can be properly pickled and unpickled.
-    # This is necessary for using them in beam.Map.
-    coder = pickle.loads(pickle.dumps(coder))
-    decode = pickle.loads(pickle.dumps(coder.DecodeBatch))
-    result = decode(serialized_examples)
-    self.assertIsInstance(result, pa.RecordBatch)
-    self.assertTrue(
-        result.equals(expected),
-        "actual: {}\n expected:{}".format(result, expected))
-    if serialized_schema:
-      self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
-
-  @parameterized.named_parameters(*_INVALID_INPUT_CASES)
-  def test_invalid_input(self, schema_text_proto, examples_text_proto, error,
-                         error_msg_regex):
-    serialized_examples = [
-        text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
-        for pbtxt in examples_text_proto
-    ]
-    serialized_schema = None
-    if schema_text_proto is not None:
-      serialized_schema = text_format.Parse(
-          schema_text_proto, schema_pb2.Schema()).SerializeToString()
-
-    if serialized_schema:
-      coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
-    else:
-      coder = example_coder.ExamplesToRecordBatchDecoder()
-
-    with self.assertRaisesRegex(error, error_msg_regex):
-      coder.DecodeBatch(serialized_examples)
-
-  def test_arrow_schema_not_available_if_tfmd_schema_not_available(self):
-    coder = example_coder.ExamplesToRecordBatchDecoder()
-    with self.assertRaisesRegex(RuntimeError, "Unable to get the arrow schema"):
-      _ = coder.ArrowSchema()
-
-  def test_invalid_feature_type(self):
-    serialized_schema = text_format.Parse(
-        """
+    @parameterized.named_parameters(*_DECODE_CASES)
+    def test_decode(self, schema_text_proto, examples_text_proto, expected):
+        serialized_examples = [
+            text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
+            for pbtxt in examples_text_proto
+        ]
+        serialized_schema = None
+        if schema_text_proto is not None:
+            serialized_schema = text_format.Parse(
+                schema_text_proto, schema_pb2.Schema()
+            ).SerializeToString()
+
+        coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
+
+        result = coder.DecodeBatch(serialized_examples)
+        self.assertIsInstance(result, pa.RecordBatch)
+        self.assertTrue(
+            result.equals(expected),
+            (
+                f"\nactual: {result.to_pydict()}\nactual schema:"
+                f" {result.schema}\nexpected:{expected.to_pydict()}\nexpected"
+                f" schema: {expected.schema}\nencoded: {serialized_examples}"
+            ),
+        )
+        if serialized_schema:
+            self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
+
+        # Verify that coder and DecodeBatch can be properly pickled and unpickled.
+        # This is necessary for using them in beam.Map.
+        coder = pickle.loads(pickle.dumps(coder))
+        decode = pickle.loads(pickle.dumps(coder.DecodeBatch))
+        result = decode(serialized_examples)
+        self.assertIsInstance(result, pa.RecordBatch)
+        self.assertTrue(
+            result.equals(expected), f"actual: {result}\n expected:{expected}"
+        )
+        if serialized_schema:
+            self.assertTrue(expected.schema.equals(coder.ArrowSchema()))
+
+    @parameterized.named_parameters(*_INVALID_INPUT_CASES)
+    def test_invalid_input(
+        self, schema_text_proto, examples_text_proto, error, error_msg_regex
+    ):
+        serialized_examples = [
+            text_format.Parse(pbtxt, tf.train.Example()).SerializeToString()
+            for pbtxt in examples_text_proto
+        ]
+        serialized_schema = None
+        if schema_text_proto is not None:
+            serialized_schema = text_format.Parse(
+                schema_text_proto, schema_pb2.Schema()
+            ).SerializeToString()
+
+        if serialized_schema:
+            coder = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
+        else:
+            coder = example_coder.ExamplesToRecordBatchDecoder()
+
+        with self.assertRaisesRegex(error, error_msg_regex):
+            coder.DecodeBatch(serialized_examples)
+
+    def test_arrow_schema_not_available_if_tfmd_schema_not_available(self):
+        coder = example_coder.ExamplesToRecordBatchDecoder()
+        with self.assertRaisesRegex(RuntimeError, "Unable to get the arrow schema"):
+            _ = coder.ArrowSchema()
+
+    def test_invalid_feature_type(self):
+        serialized_schema = text_format.Parse(
+            """
         feature {
           name: "a"
           type: STRUCT
         }
-        """, schema_pb2.Schema()).SerializeToString()
-    with self.assertRaisesRegex(RuntimeError,
-                                "Bad field type for feature: a.*"):
-      _ = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
+        """,
+            schema_pb2.Schema(),
+        ).SerializeToString()
+        with self.assertRaisesRegex(RuntimeError, "Bad field type for feature: a.*"):
+            _ = example_coder.ExamplesToRecordBatchDecoder(serialized_schema)
 
 
 _ENCODE_TEST_EXAMPLES = [
@@ -316,67 +356,83 @@ def test_invalid_feature_type(self):
 
 _ENCODE_CASES = [
     dict(
-        record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b"a", b"b"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([[1.0, 2.0], None, None, []], type=pa.list_(pa.float32())),
-            pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64()))
-        ], ["x", "y", "z"]),
-        examples_text_proto=_ENCODE_TEST_EXAMPLES),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[b"a", b"b"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array([[1.0, 2.0], None, None, []], type=pa.list_(pa.float32())),
+                pa.array([[4, 5], None, None, []], type=pa.large_list(pa.int64())),
+            ],
+            ["x", "y", "z"],
+        ),
+        examples_text_proto=_ENCODE_TEST_EXAMPLES,
+    ),
     dict(
-        record_batch=pa.RecordBatch.from_arrays([
-            pa.array([None, None, [b"a", b"b"]],
-                     type=pa.large_list(pa.binary())),
-            pa.array([None, None, [1.0, 2.0]], type=pa.large_list(
-                pa.float32())),
-            pa.array([None, None, [4, 5]], type=pa.list_(pa.int64()))
-        ], ["x", "y", "z"]),
-        examples_text_proto=list(reversed(_ENCODE_TEST_EXAMPLES[:-1]))),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None, [b"a", b"b"]], type=pa.large_list(pa.binary())),
+                pa.array([None, None, [1.0, 2.0]], type=pa.large_list(pa.float32())),
+                pa.array([None, None, [4, 5]], type=pa.list_(pa.int64())),
+            ],
+            ["x", "y", "z"],
+        ),
+        examples_text_proto=list(reversed(_ENCODE_TEST_EXAMPLES[:-1])),
+    ),
 ]
 
 _INVALID_ENCODE_TYPE_CASES = [
     dict(
         record_batch=pa.RecordBatch.from_arrays([pa.array([1, 2, 3])], ["a"]),
         error=RuntimeError,
-        error_msg_regex="Expected ListArray or LargeListArray"),
+        error_msg_regex="Expected ListArray or LargeListArray",
+    ),
     dict(
         record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([[True], [False]], type=pa.large_list(pa.bool_()))],
-            ["a"]),
+            [pa.array([[True], [False]], type=pa.large_list(pa.bool_()))], ["a"]
+        ),
         error=RuntimeError,
-        error_msg_regex="Bad field type"),
+        error_msg_regex="Bad field type",
+    ),
     dict(
-        record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[b"a", b"b"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-        ], ["x", "x"]),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[b"a", b"b"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+            ],
+            ["x", "x"],
+        ),
         error=RuntimeError,
-        error_msg_regex="RecordBatch contains duplicate column names")
+        error_msg_regex="RecordBatch contains duplicate column names",
+    ),
 ]
 
 
 class RecordBatchToExamplesTest(parameterized.TestCase):
-
-  @parameterized.parameters(*_ENCODE_CASES)
-  def test_encode(self, record_batch, examples_text_proto):
-    expected_examples = [
-        text_format.Parse(pbtxt, tf.train.Example())
-        for pbtxt in examples_text_proto
-    ]
-    coder = example_coder.RecordBatchToExamplesEncoder()
-    actual_examples = [
-        tf.train.Example.FromString(encoded)
-        for encoded in coder.encode(record_batch)
-    ]
-
-    self.assertEqual(actual_examples, expected_examples)
-
-  @parameterized.parameters(*_INVALID_ENCODE_TYPE_CASES)
-  def test_invalid_input(self, record_batch, error, error_msg_regex):
-    with self.assertRaisesRegex(error, error_msg_regex):
-      example_coder.RecordBatchToExamplesEncoder().encode(record_batch)
+    @parameterized.parameters(*_ENCODE_CASES)
+    def test_encode(self, record_batch, examples_text_proto):
+        expected_examples = [
+            text_format.Parse(pbtxt, tf.train.Example())
+            for pbtxt in examples_text_proto
+        ]
+        coder = example_coder.RecordBatchToExamplesEncoder()
+        actual_examples = [
+            tf.train.Example.FromString(encoded)
+            for encoded in coder.encode(record_batch)
+        ]
+
+        self.assertEqual(actual_examples, expected_examples)
+
+    @parameterized.parameters(*_INVALID_ENCODE_TYPE_CASES)
+    def test_invalid_input(self, record_batch, error, error_msg_regex):
+        with self.assertRaisesRegex(error, error_msg_regex):
+            example_coder.RecordBatchToExamplesEncoder().encode(record_batch)
 
 
 _ENCODE_NESTED_TEST_EXAMPLES = [
@@ -581,9 +637,7 @@ def test_invalid_input(self, record_batch, error, error_msg_regex):
             ["x", "y"],
         ),
         error=RuntimeError,
-        error_msg_regex=(
-            "conflicts with another source column in the same batch."
-        ),
+        error_msg_regex=("conflicts with another source column in the same batch."),
         schema=text_format.Parse(
             """
             tensor_representation_group {
@@ -668,74 +722,68 @@ def test_invalid_input(self, record_batch, error, error_msg_regex):
 ]
 
 
-class RecordBatchToExamplesEncoderTest(
-    parameterized.TestCase, tf.test.TestCase
-):
-
-  @parameterized.parameters(*(_ENCODE_CASES + _ENCODE_NESTED_CASES))
-  def test_encode(self, record_batch, examples_text_proto, schema=None):
-    expected_examples = [
-        text_format.Parse(pbtxt, tf.train.Example())
-        for pbtxt in examples_text_proto
-    ]
-    coder = example_coder.RecordBatchToExamplesEncoder(schema)
-    # Verify that coder can be properly pickled and unpickled.
-    coder = pickle.loads(pickle.dumps(coder))
-    encoded = coder.encode(record_batch)
-    self.assertLen(encoded, len(expected_examples))
-    for idx, (expected, actual) in enumerate(zip(expected_examples, encoded)):
-      self.assertProtoEquals(
-          expected,
-          tf.train.Example.FromString(actual),
-          msg=f" at position {idx}",
-      )
-
-  @parameterized.parameters(*(_INVALID_ENCODE_TYPE_CASES +
-                              _INVALID_ENCODE_NESTED_TYPE_CASES))
-  def test_invalid_input(self,
-                         record_batch,
-                         error,
-                         error_msg_regex,
-                         schema=None):
-    schema = (schema or schema_pb2.Schema())
-    coder = example_coder.RecordBatchToExamplesEncoder(schema)
-    with self.assertRaisesRegex(error, error_msg_regex):
-      coder.encode(record_batch)
-
-  def test_encode_is_consistent_with_parse_example(self):
-    coder = example_coder.RecordBatchToExamplesEncoder(_ENCODE_NESTED_SCHEMA)
-    encoded = tf.constant(coder.encode(_ENCODE_NESTED_RECORD_BATCH))
-    tensor_representations = (
-        tensor_representation_util.GetTensorRepresentationsFromSchema(
-            _ENCODE_NESTED_SCHEMA
-        )
+class RecordBatchToExamplesEncoderTest(parameterized.TestCase, tf.test.TestCase):
+    @parameterized.parameters(*(_ENCODE_CASES + _ENCODE_NESTED_CASES))
+    def test_encode(self, record_batch, examples_text_proto, schema=None):
+        expected_examples = [
+            text_format.Parse(pbtxt, tf.train.Example())
+            for pbtxt in examples_text_proto
+        ]
+        coder = example_coder.RecordBatchToExamplesEncoder(schema)
+        # Verify that coder can be properly pickled and unpickled.
+        coder = pickle.loads(pickle.dumps(coder))
+        encoded = coder.encode(record_batch)
+        self.assertLen(encoded, len(expected_examples))
+        for idx, (expected, actual) in enumerate(zip(expected_examples, encoded)):
+            self.assertProtoEquals(
+                expected,
+                tf.train.Example.FromString(actual),
+                msg=f" at position {idx}",
+            )
+
+    @parameterized.parameters(
+        *(_INVALID_ENCODE_TYPE_CASES + _INVALID_ENCODE_NESTED_TYPE_CASES)
     )
-    dtypes = {
-        "x": schema_pb2.FeatureType.BYTES,
-        "y": schema_pb2.FeatureType.FLOAT,
-        "z": schema_pb2.FeatureType.INT,
-    }
-    feature_spec = {
-        name: tensor_representation_util.CreateTfExampleParserConfig(
-            representation, dtypes[name]
+    def test_invalid_input(self, record_batch, error, error_msg_regex, schema=None):
+        schema = schema or schema_pb2.Schema()
+        coder = example_coder.RecordBatchToExamplesEncoder(schema)
+        with self.assertRaisesRegex(error, error_msg_regex):
+            coder.encode(record_batch)
+
+    def test_encode_is_consistent_with_parse_example(self):
+        coder = example_coder.RecordBatchToExamplesEncoder(_ENCODE_NESTED_SCHEMA)
+        encoded = tf.constant(coder.encode(_ENCODE_NESTED_RECORD_BATCH))
+        tensor_representations = (
+            tensor_representation_util.GetTensorRepresentationsFromSchema(
+                _ENCODE_NESTED_SCHEMA
+            )
         )
-        for name, representation in tensor_representations.items()
-    }
-    decoded = tf.io.parse_example(encoded, feature_spec)
-    expected_values = {
-        "x": [[[b"a", b"b"]], [], [], []],
-        "y": [[[[1.0, 2.0]]], [[[3.0, 4.0]]], [], [[]]],
-        "z": [[[[[4], [5]]]], [], [[[[6], []]]], [[[[], []]]]],
-    }
-    expected_ragged_ranks = {"x": 1, "y": 2, "z": 4}
-    self.assertLen(decoded, len(expected_values))
-    for name, expected in expected_values.items():
-      actual = decoded[name]
-      self.assertEqual(actual.to_list(), expected, msg=f"For {name}")
-      self.assertEqual(
-          actual.ragged_rank, expected_ragged_ranks[name], msg=f"For {name}"
-      )
+        dtypes = {
+            "x": schema_pb2.FeatureType.BYTES,
+            "y": schema_pb2.FeatureType.FLOAT,
+            "z": schema_pb2.FeatureType.INT,
+        }
+        feature_spec = {
+            name: tensor_representation_util.CreateTfExampleParserConfig(
+                representation, dtypes[name]
+            )
+            for name, representation in tensor_representations.items()
+        }
+        decoded = tf.io.parse_example(encoded, feature_spec)
+        expected_values = {
+            "x": [[[b"a", b"b"]], [], [], []],
+            "y": [[[[1.0, 2.0]]], [[[3.0, 4.0]]], [], [[]]],
+            "z": [[[[[4], [5]]]], [], [[[[6], []]]], [[[[], []]]]],
+        }
+        expected_ragged_ranks = {"x": 1, "y": 2, "z": 4}
+        self.assertLen(decoded, len(expected_values))
+        for name, expected in expected_values.items():
+            actual = decoded[name]
+            self.assertEqual(actual.to_list(), expected, msg=f"For {name}")
+            self.assertEqual(
+                actual.ragged_rank, expected_ragged_ranks[name], msg=f"For {name}"
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/coders/example_numpy_decoder_test.py b/tfx_bsl/coders/example_numpy_decoder_test.py
index d60d0d15..bfd6b2cd 100644
--- a/tfx_bsl/coders/example_numpy_decoder_test.py
+++ b/tfx_bsl/coders/example_numpy_decoder_test.py
@@ -15,128 +15,126 @@
 
 import numpy as np
 import tensorflow as tf
-from tfx_bsl.coders import example_coder
-
+from absl.testing import absltest, parameterized
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
+
+from tfx_bsl.coders import example_coder
 
 _TF_EXAMPLE_DECODER_TESTS = [
     {
-        'testcase_name': 'empty_input',
-        'example_proto_text': '''features {}''',
-        'decoded_example': {}
+        "testcase_name": "empty_input",
+        "example_proto_text": """features {}""",
+        "decoded_example": {},
     },
     {
-        'testcase_name': 'int_feature_non_empty',
-        'example_proto_text': '''
+        "testcase_name": "int_feature_non_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { int64_list { value: [ 1, 2, 3 ] } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([1, 2, 3], dtype=np.int64)}
+        """,
+        "decoded_example": {"x": np.array([1, 2, 3], dtype=np.int64)},
     },
     {
-        'testcase_name': 'float_feature_non_empty',
-        'example_proto_text': '''
+        "testcase_name": "float_feature_non_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { float_list { value: [ 4.0, 5.0 ] } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([4.0, 5.0], dtype=np.float32)}
+        """,
+        "decoded_example": {"x": np.array([4.0, 5.0], dtype=np.float32)},
     },
     {
-        'testcase_name': 'str_feature_non_empty',
-        'example_proto_text': '''
+        "testcase_name": "str_feature_non_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { bytes_list { value: [ 'string', 'list' ] } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([b'string', b'list'],
-                                          dtype=object)}
+        """,
+        "decoded_example": {"x": np.array([b"string", b"list"], dtype=object)},
     },
     {
-        'testcase_name': 'int_feature_empty',
-        'example_proto_text': '''
+        "testcase_name": "int_feature_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { int64_list { } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([], dtype=np.int64)}
+        """,
+        "decoded_example": {"x": np.array([], dtype=np.int64)},
     },
     {
-        'testcase_name': 'float_feature_empty',
-        'example_proto_text': '''
+        "testcase_name": "float_feature_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { float_list { } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([], dtype=np.float32)}
+        """,
+        "decoded_example": {"x": np.array([], dtype=np.float32)},
     },
     {
-        'testcase_name': 'str_feature_empty',
-        'example_proto_text': '''
+        "testcase_name": "str_feature_empty",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { bytes_list { } }
             }
           }
-        ''',
-        'decoded_example': {'x': np.array([], dtype=object)}
+        """,
+        "decoded_example": {"x": np.array([], dtype=object)},
     },
     {
-        'testcase_name': 'feature_missing',
-        'example_proto_text': '''
+        "testcase_name": "feature_missing",
+        "example_proto_text": """
           features {
             feature {
               key: 'x'
               value { }
             }
           }
-        ''',
-        'decoded_example': {'x': None}
+        """,
+        "decoded_example": {"x": None},
     },
 ]
 
 
 class TFExampleDecoderTest(parameterized.TestCase):
-  """Tests for TFExampleDecoder."""
+    """Tests for TFExampleDecoder."""
 
-  def _check_decoding_results(self, actual, expected):
-    # Check that the numpy array dtypes match.
-    self.assertEqual(len(actual), len(expected))
-    for key in actual:
-      if expected[key] is None:
-        self.assertEqual(actual[key], None)
-      else:
-        self.assertEqual(actual[key].dtype, expected[key].dtype)
-        np.testing.assert_equal(actual, expected)
+    def _check_decoding_results(self, actual, expected):
+        # Check that the numpy array dtypes match.
+        self.assertEqual(len(actual), len(expected))
+        for key in actual:
+            if expected[key] is None:
+                self.assertEqual(actual[key], None)
+            else:
+                self.assertEqual(actual[key].dtype, expected[key].dtype)
+                np.testing.assert_equal(actual, expected)
 
-  @parameterized.named_parameters(
-      *_TF_EXAMPLE_DECODER_TESTS)
-  def test_decode_example(self, example_proto_text, decoded_example):
-    example = tf.train.Example()
-    text_format.Merge(example_proto_text, example)
-    self._check_decoding_results(
-        example_coder.ExampleToNumpyDict(example.SerializeToString()),
-        decoded_example)
+    @parameterized.named_parameters(*_TF_EXAMPLE_DECODER_TESTS)
+    def test_decode_example(self, example_proto_text, decoded_example):
+        example = tf.train.Example()
+        text_format.Merge(example_proto_text, example)
+        self._check_decoding_results(
+            example_coder.ExampleToNumpyDict(example.SerializeToString()),
+            decoded_example,
+        )
 
 
-if __name__ == '__main__':
-  absltest.main()
+if __name__ == "__main__":
+    absltest.main()
diff --git a/tfx_bsl/coders/sequence_example_coder.py b/tfx_bsl/coders/sequence_example_coder.py
index 3c6bc22c..962fc538 100644
--- a/tfx_bsl/coders/sequence_example_coder.py
+++ b/tfx_bsl/coders/sequence_example_coder.py
@@ -18,11 +18,14 @@
 # pylint: disable=g-import-not-at-top
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.coders import SequenceExamplesToRecordBatchDecoder
+    from tfx_bsl.cc.tfx_bsl_extension.coders import SequenceExamplesToRecordBatchDecoder
 except ImportError:
-  import sys
-  sys.stderr.write("Error importing tfx_bsl_extension.coders. "
-                   "Some tfx_bsl functionalities are not available.")
+    import sys
+
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.coders. "
+        "Some tfx_bsl functionalities are not available."
+    )
 # pylint: enable=g-import-not-at-top
 # pytype: enable=import-error
 # pylint: enable=unused-import
diff --git a/tfx_bsl/coders/sequence_example_coder_test.py b/tfx_bsl/coders/sequence_example_coder_test.py
index 45f56243..993a25d1 100644
--- a/tfx_bsl/coders/sequence_example_coder_test.py
+++ b/tfx_bsl/coders/sequence_example_coder_test.py
@@ -15,13 +15,12 @@
 
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.coders import sequence_example_coder
-
+from absl.testing import absltest, parameterized
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import sequence_example_coder
+
 _TEST_SEQUENCE_COLUMN_NAME = "##SEQUENCE##"
 _TYPED_SEQUENCE_EXAMPLE = """
     context {
@@ -325,37 +324,55 @@
         testcase_name="without_schema_first_example_typed",
         schema_text_proto=None,
         sequence_examples_text_proto=[
-            _TYPED_SEQUENCE_EXAMPLE, _UNTYPED_SEQUENCE_EXAMPLE,
+            _TYPED_SEQUENCE_EXAMPLE,
+            _UNTYPED_SEQUENCE_EXAMPLE,
             _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
-            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE
+            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE,
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([[1], None, None, []], type=pa.large_list(pa.int64())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-            pa.array([[b"a", b"b", b"c"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([None, None, None, None], pa.null()),
-            pa.array([None, None, [1.0], None],
-                     type=pa.large_list(pa.float32())),
-            pa.StructArray.from_arrays([
-                pa.array([None, None, [[1.0]], None],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[[1, 2], [3]], [], [None, None, None], [[], []]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
-                pa.array([[[3.0, 4.0], [1.0, 2.0]], [], [None], [[]]],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[[b"a", b"b"], [b"c"]], [], [None], [[]]],
-                         type=pa.large_list(pa.large_list(pa.large_binary())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], None, None, []], type=pa.large_list(pa.int64())),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+                pa.array(
+                    [[b"a", b"b", b"c"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array([None, None, None, None], pa.null()),
+                pa.array([None, None, [1.0], None], type=pa.large_list(pa.float32())),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [None, None, [[1.0]], None],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[[1, 2], [3]], [], [None, None, None], [[], []]],
+                            type=pa.large_list(pa.large_list(pa.int64())),
+                        ),
+                        pa.array(
+                            [[[3.0, 4.0], [1.0, 2.0]], [], [None], [[]]],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[[b"a", b"b"], [b"c"]], [], [None], [[]]],
+                            type=pa.large_list(pa.large_list(pa.large_binary())),
+                        ),
+                    ],
+                    names=["sequence_v", "sequence_x", "sequence_y", "sequence_z"],
+                ),
+            ],
+            [
+                "context_a",
+                "context_b",
+                "context_c",
+                "context_d",
+                "context_e",
+                _TEST_SEQUENCE_COLUMN_NAME,
             ],
-                                       names=[
-                                           "sequence_v", "sequence_x",
-                                           "sequence_y", "sequence_z"
-                                       ])
-        ], [
-            "context_a", "context_b", "context_c", "context_d", "context_e",
-            _TEST_SEQUENCE_COLUMN_NAME
-        ])),
+        ),
+    ),
     dict(
         testcase_name="with_schema_first_example_typed",
         schema_text_proto="""
@@ -390,64 +407,95 @@
           }
         }""",
         sequence_examples_text_proto=[
-            _TYPED_SEQUENCE_EXAMPLE, _UNTYPED_SEQUENCE_EXAMPLE,
+            _TYPED_SEQUENCE_EXAMPLE,
+            _UNTYPED_SEQUENCE_EXAMPLE,
             _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
-            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE
+            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE,
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([[1], None, None, []], type=pa.large_list(pa.int64())),
-            pa.array([[1.0, 2.0], None, None, []],
-                     type=pa.large_list(pa.float32())),
-            pa.array([[b"a", b"b", b"c"], None, None, []],
-                     type=pa.large_list(pa.large_binary())),
-            pa.StructArray.from_arrays([
-                pa.array([[[1, 2], [3]], [], [None, None, None], [[], []]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
-                pa.array([[[3.0, 4.0], [1.0, 2.0]], [], [None], [[]]],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[[b"a", b"b"], [b"c"]], [], [None], [[]]],
-                         type=pa.large_list(pa.large_list(pa.large_binary())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], None, None, []], type=pa.large_list(pa.int64())),
+                pa.array(
+                    [[1.0, 2.0], None, None, []], type=pa.large_list(pa.float32())
+                ),
+                pa.array(
+                    [[b"a", b"b", b"c"], None, None, []],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[[1, 2], [3]], [], [None, None, None], [[], []]],
+                            type=pa.large_list(pa.large_list(pa.int64())),
+                        ),
+                        pa.array(
+                            [[[3.0, 4.0], [1.0, 2.0]], [], [None], [[]]],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[[b"a", b"b"], [b"c"]], [], [None], [[]]],
+                            type=pa.large_list(pa.large_list(pa.large_binary())),
+                        ),
+                    ],
+                    names=["sequence_x", "sequence_y", "sequence_z"],
+                ),
             ],
-                                       names=[
-                                           "sequence_x", "sequence_y",
-                                           "sequence_z"
-                                       ])
-        ], ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME
-           ])),
+            ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="without_schema_untyped_then_typed_examples",
         schema_text_proto=None,
         sequence_examples_text_proto=[
-            _UNTYPED_SEQUENCE_EXAMPLE, _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
-            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE, _TYPED_SEQUENCE_EXAMPLE
+            _UNTYPED_SEQUENCE_EXAMPLE,
+            _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
+            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE,
+            _TYPED_SEQUENCE_EXAMPLE,
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None, [], [1]], type=pa.large_list(pa.int64())),
-            pa.array([None, None, [], [1.0, 2.0]],
-                     type=pa.large_list(pa.float32())),
-            pa.array([None, None, [], [b"a", b"b", b"c"]],
-                     type=pa.large_list(pa.large_binary())),
-            pa.array([None, None, None, None], pa.null()),
-            pa.array([None, [1.0], None, None],
-                     type=pa.large_list(pa.float32())),
-            pa.StructArray.from_arrays([
-                pa.array([None, [[1.0]], None, None],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[], [None, None, None], [[], []], [[1, 2], [3]]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
-                pa.array([[], [None], [[]], [[3.0, 4.0], [1.0, 2.0]]],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[], [None], [[]], [[b"a", b"b"], [b"c"]]],
-                         type=pa.large_list(pa.large_list(pa.large_binary())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None, [], [1]], type=pa.large_list(pa.int64())),
+                pa.array(
+                    [None, None, [], [1.0, 2.0]], type=pa.large_list(pa.float32())
+                ),
+                pa.array(
+                    [None, None, [], [b"a", b"b", b"c"]],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.array([None, None, None, None], pa.null()),
+                pa.array([None, [1.0], None, None], type=pa.large_list(pa.float32())),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [None, [[1.0]], None, None],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[], [None, None, None], [[], []], [[1, 2], [3]]],
+                            type=pa.large_list(pa.large_list(pa.int64())),
+                        ),
+                        pa.array(
+                            [[], [None], [[]], [[3.0, 4.0], [1.0, 2.0]]],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[], [None], [[]], [[b"a", b"b"], [b"c"]]],
+                            type=pa.large_list(pa.large_list(pa.large_binary())),
+                        ),
+                    ],
+                    names=["sequence_v", "sequence_x", "sequence_y", "sequence_z"],
+                ),
+            ],
+            [
+                "context_a",
+                "context_b",
+                "context_c",
+                "context_d",
+                "context_e",
+                _TEST_SEQUENCE_COLUMN_NAME,
             ],
-                                       names=[
-                                           "sequence_v", "sequence_x",
-                                           "sequence_y", "sequence_z"
-                                       ])
-        ], [
-            "context_a", "context_b", "context_c", "context_d", "context_e",
-            _TEST_SEQUENCE_COLUMN_NAME
-        ])),
+        ),
+    ),
     dict(
         testcase_name="with_schema_untyped_then_typed_examples",
         schema_text_proto="""
@@ -482,50 +530,72 @@
           }
         }""",
         sequence_examples_text_proto=[
-            _UNTYPED_SEQUENCE_EXAMPLE, _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
-            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE, _TYPED_SEQUENCE_EXAMPLE
+            _UNTYPED_SEQUENCE_EXAMPLE,
+            _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
+            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE,
+            _TYPED_SEQUENCE_EXAMPLE,
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None, [], [1]], type=pa.large_list(pa.int64())),
-            pa.array([None, None, [], [1.0, 2.0]],
-                     type=pa.large_list(pa.float32())),
-            pa.array([None, None, [], [b"a", b"b", b"c"]],
-                     type=pa.large_list(pa.large_binary())),
-            pa.StructArray.from_arrays([
-                pa.array([[], [None, None, None], [[], []], [[1, 2], [3]]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
-                pa.array([[], [None], [[]], [[3.0, 4.0], [1.0, 2.0]]],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([[], [None], [[]], [[b"a", b"b"], [b"c"]]],
-                         type=pa.large_list(pa.large_list(pa.large_binary())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None, [], [1]], type=pa.large_list(pa.int64())),
+                pa.array(
+                    [None, None, [], [1.0, 2.0]], type=pa.large_list(pa.float32())
+                ),
+                pa.array(
+                    [None, None, [], [b"a", b"b", b"c"]],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[], [None, None, None], [[], []], [[1, 2], [3]]],
+                            type=pa.large_list(pa.large_list(pa.int64())),
+                        ),
+                        pa.array(
+                            [[], [None], [[]], [[3.0, 4.0], [1.0, 2.0]]],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [[], [None], [[]], [[b"a", b"b"], [b"c"]]],
+                            type=pa.large_list(pa.large_list(pa.large_binary())),
+                        ),
+                    ],
+                    names=["sequence_x", "sequence_y", "sequence_z"],
+                ),
             ],
-                                       names=[
-                                           "sequence_x", "sequence_y",
-                                           "sequence_z"
-                                       ])
-        ], ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME
-           ])),
+            ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="without_schema_no_typed_examples",
         schema_text_proto=None,
         sequence_examples_text_proto=_TEST_SEQUENCE_EXAMPLES_NONE_TYPED,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None], type=pa.null()),
-            pa.array([None, None], type=pa.null()),
-            pa.array([None, None], type=pa.null()),
-            pa.array([None, None], type=pa.null()),
-            pa.StructArray.from_arrays([
-                pa.array([None, [None]], type=pa.large_list(pa.null())),
-                pa.array([[], [None]], type=pa.large_list(pa.null())),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None], type=pa.null()),
+                pa.array([None, None], type=pa.null()),
+                pa.array([None, None], type=pa.null()),
+                pa.array([None, None], type=pa.null()),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([None, [None]], type=pa.large_list(pa.null())),
+                        pa.array([[], [None]], type=pa.large_list(pa.null())),
+                    ],
+                    names=[
+                        "sequence_w",
+                        "sequence_x",
+                    ],
+                ),
+            ],
+            [
+                "context_a",
+                "context_b",
+                "context_c",
+                "context_d",
+                _TEST_SEQUENCE_COLUMN_NAME,
             ],
-                                       names=[
-                                           "sequence_w",
-                                           "sequence_x",
-                                       ])
-        ], [
-            "context_a", "context_b", "context_c", "context_d",
-            _TEST_SEQUENCE_COLUMN_NAME
-        ])),
+        ),
+    ),
     dict(
         testcase_name="with_schema_no_typed_examples",
         schema_text_proto="""
@@ -560,24 +630,31 @@
           }
         }""",
         sequence_examples_text_proto=_TEST_SEQUENCE_EXAMPLES_NONE_TYPED,
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None], type=pa.large_list(pa.int64())),
-            pa.array([None, None], type=pa.large_list(pa.float32())),
-            pa.array([None, None], type=pa.large_list(pa.large_binary())),
-            pa.StructArray.from_arrays([
-                pa.array([[], [None]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
-                pa.array([None, None],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-                pa.array([None, None],
-                         type=pa.large_list(pa.large_list(pa.large_binary())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None, None], type=pa.large_list(pa.int64())),
+                pa.array([None, None], type=pa.large_list(pa.float32())),
+                pa.array([None, None], type=pa.large_list(pa.large_binary())),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[], [None]], type=pa.large_list(pa.large_list(pa.int64()))
+                        ),
+                        pa.array(
+                            [None, None],
+                            type=pa.large_list(pa.large_list(pa.float32())),
+                        ),
+                        pa.array(
+                            [None, None],
+                            type=pa.large_list(pa.large_list(pa.large_binary())),
+                        ),
+                    ],
+                    names=["sequence_x", "sequence_y", "sequence_z"],
+                ),
             ],
-                                       names=[
-                                           "sequence_x", "sequence_y",
-                                           "sequence_z"
-                                       ])
-        ], ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME
-           ])),
+            ["context_a", "context_b", "context_c", _TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="build_nulls_for_unseen_feature",
         schema_text_proto="""
@@ -597,19 +674,29 @@
         }
         """,
         sequence_examples_text_proto=[
-            _TYPED_SEQUENCE_EXAMPLE, _UNTYPED_SEQUENCE_EXAMPLE,
+            _TYPED_SEQUENCE_EXAMPLE,
+            _UNTYPED_SEQUENCE_EXAMPLE,
             _SOME_FEATURES_TYPED_SEQUENCE_EXAMPLE,
-            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE
+            _EMPTY_VALUES_LIST_SEQUENCE_EXAMPLE,
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None, None, None, None],
-                     type=pa.large_list(pa.large_binary())),
-            pa.StructArray.from_arrays([
-                pa.array([None, None, None, None],
-                         type=pa.large_list(pa.large_list(pa.int64())))
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [None, None, None, None], type=pa.large_list(pa.large_binary())
+                ),
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [None, None, None, None],
+                            type=pa.large_list(pa.large_list(pa.int64())),
+                        )
+                    ],
+                    names=["sequence_u"],
+                ),
             ],
-                                       names=["sequence_u"]),
-        ], ["context_u", _TEST_SEQUENCE_COLUMN_NAME])),
+            ["context_u", _TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="build_null_for_unset_kind",
         schema_text_proto="""
@@ -636,12 +723,17 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.large_list(pa.large_binary())),
-            pa.StructArray.from_arrays(
-                [pa.array([[]], type=pa.large_list(pa.large_list(pa.int64())))],
-                names=["sequence_a"]),
-        ], ["context_a", _TEST_SEQUENCE_COLUMN_NAME])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.large_list(pa.large_binary())),
+                pa.StructArray.from_arrays(
+                    [pa.array([[]], type=pa.large_list(pa.large_list(pa.int64())))],
+                    names=["sequence_a"],
+                ),
+            ],
+            ["context_a", _TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="schema_does_not_contain_sequence_feature",
         schema_text_proto="""
@@ -658,9 +750,13 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.large_list(pa.large_binary())),
-        ], ["context_a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.large_list(pa.large_binary())),
+            ],
+            ["context_a"],
+        ),
+    ),
     dict(
         testcase_name="duplicate_context_feature_names_in_schema",
         schema_text_proto="""
@@ -682,9 +778,13 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([None], type=pa.large_list(pa.large_binary())),
-        ], ["context_a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([None], type=pa.large_list(pa.large_binary())),
+            ],
+            ["context_a"],
+        ),
+    ),
     dict(
         testcase_name="duplicate_sequence_feature_names_in_schema",
         schema_text_proto="""
@@ -711,20 +811,31 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.StructArray.from_arrays(
-                [pa.array([[]], type=pa.large_list(pa.large_list(pa.int64())))],
-                names=["sequence_a"]),
-        ], [_TEST_SEQUENCE_COLUMN_NAME])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [pa.array([[]], type=pa.large_list(pa.large_list(pa.int64())))],
+                    names=["sequence_a"],
+                ),
+            ],
+            [_TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="feature_lists_with_no_sequence_features",
         schema_text_proto=None,
-        sequence_examples_text_proto=["""
+        sequence_examples_text_proto=[
+            """
         feature_lists {}
-        """],
-        expected=pa.RecordBatch.from_arrays([
-            pa.StructArray.from_buffers(pa.struct([]), 1, [None]),
-        ], [_TEST_SEQUENCE_COLUMN_NAME])),
+        """
+        ],
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_buffers(pa.struct([]), 1, [None]),
+            ],
+            [_TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
     dict(
         testcase_name="without_schema_only_context_features",
         schema_text_proto=None,
@@ -742,9 +853,13 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.array([[1, 2]], type=pa.large_list(pa.int64())),
-        ], ["context_a"])),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1, 2]], type=pa.large_list(pa.int64())),
+            ],
+            ["context_a"],
+        ),
+    ),
     dict(
         testcase_name="without_schema_only_sequence_features",
         schema_text_proto=None,
@@ -764,13 +879,20 @@
         }
         """
         ],
-        expected=pa.RecordBatch.from_arrays([
-            pa.StructArray.from_arrays([
-                pa.array([[[1, 2]]],
-                         type=pa.large_list(pa.large_list(pa.int64()))),
+        expected=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[[1, 2]]], type=pa.large_list(pa.large_list(pa.int64()))
+                        ),
+                    ],
+                    names=["sequence_x"],
+                )
             ],
-                                       names=["sequence_x"])
-        ], [_TEST_SEQUENCE_COLUMN_NAME])),
+            [_TEST_SEQUENCE_COLUMN_NAME],
+        ),
+    ),
 ]
 
 _INVALID_INPUT_CASES = [
@@ -790,7 +912,8 @@
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected bytes_list, found float_list "
-            "for feature \"a\""),
+            'for feature "a"'
+        ),
     ),
     dict(
         testcase_name="sequence_feature_actual_type_mismatches_schema_type",
@@ -821,7 +944,8 @@
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected bytes_list, found float_list "
-            "for sequence feature \"a\""),
+            'for sequence feature "a"'
+        ),
     ),
     dict(
         testcase_name="context_feature_no_schema_mixed_type",
@@ -829,14 +953,16 @@
         sequence_examples_text_proto=[
             """
         context { feature { key: "a" value { float_list { value: [] } } } }
-        """, """
+        """,
+            """
         context { feature { key: "a" value { int64_list { value: [] } } } }
-        """
+        """,
         ],
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected float_list, found int64_list"
-            " for feature \"a\""),
+            ' for feature "a"'
+        ),
     ),
     dict(
         testcase_name="sequence_feature_no_schema_mixed_type",
@@ -851,7 +977,8 @@
             }
           }
         }
-        """, """
+        """,
+            """
         feature_lists {
           feature_list {
             key: 'a'
@@ -860,89 +987,95 @@
             }
           }
         }
-        """
+        """,
         ],
         error=RuntimeError,
         error_msg_regex=(
             "Feature had wrong type, expected float_list, found int64_list"
-            " for sequence feature \"a\""),
+            ' for sequence feature "a"'
+        ),
     ),
 ]
 
 
 class SequenceExamplesToRecordBatchDecoderTest(parameterized.TestCase):
+    @parameterized.named_parameters(*_DECODE_CASES)
+    def test_decode(self, schema_text_proto, sequence_examples_text_proto, expected):
+        serialized_sequence_examples = [
+            text_format.Parse(pbtxt, tf.train.SequenceExample()).SerializeToString()
+            for pbtxt in sequence_examples_text_proto
+        ]
+        serialized_schema = None
+        if schema_text_proto is not None:
+            serialized_schema = text_format.Parse(
+                schema_text_proto, schema_pb2.Schema()
+            ).SerializeToString()
 
-  @parameterized.named_parameters(*_DECODE_CASES)
-  def test_decode(self, schema_text_proto, sequence_examples_text_proto,
-                  expected):
-    serialized_sequence_examples = [
-        text_format.Parse(pbtxt,
-                          tf.train.SequenceExample()).SerializeToString()
-        for pbtxt in sequence_examples_text_proto
-    ]
-    serialized_schema = None
-    if schema_text_proto is not None:
-      serialized_schema = text_format.Parse(
-          schema_text_proto, schema_pb2.Schema()).SerializeToString()
-
-    if serialized_schema:
-      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-          _TEST_SEQUENCE_COLUMN_NAME,
-          serialized_schema)
-    else:
-      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-          _TEST_SEQUENCE_COLUMN_NAME)
+        if serialized_schema:
+            coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _TEST_SEQUENCE_COLUMN_NAME, serialized_schema
+            )
+        else:
+            coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _TEST_SEQUENCE_COLUMN_NAME
+            )
 
-    result = coder.DecodeBatch(serialized_sequence_examples)
-    self.assertIsInstance(result, pa.RecordBatch)
-    self.assertTrue(
-        result.equals(expected),
-        "actual: {}\n expected:{}".format(result, expected))
+        result = coder.DecodeBatch(serialized_sequence_examples)
+        self.assertIsInstance(result, pa.RecordBatch)
+        self.assertTrue(
+            result.equals(expected), f"actual: {result}\n expected:{expected}"
+        )
 
-    if serialized_schema is not None:
-      self.assertTrue(coder.ArrowSchema().equals(result.schema))
+        if serialized_schema is not None:
+            self.assertTrue(coder.ArrowSchema().equals(result.schema))
 
-  @parameterized.named_parameters(*_INVALID_INPUT_CASES)
-  def test_invalid_input(self, schema_text_proto, sequence_examples_text_proto,
-                         error, error_msg_regex):
-    serialized_sequence_examples = [
-        text_format.Parse(pbtxt,
-                          tf.train.SequenceExample()).SerializeToString()
-        for pbtxt in sequence_examples_text_proto
-    ]
-    serialized_schema = None
-    if schema_text_proto is not None:
-      serialized_schema = text_format.Parse(
-          schema_text_proto, schema_pb2.Schema()).SerializeToString()
+    @parameterized.named_parameters(*_INVALID_INPUT_CASES)
+    def test_invalid_input(
+        self, schema_text_proto, sequence_examples_text_proto, error, error_msg_regex
+    ):
+        serialized_sequence_examples = [
+            text_format.Parse(pbtxt, tf.train.SequenceExample()).SerializeToString()
+            for pbtxt in sequence_examples_text_proto
+        ]
+        serialized_schema = None
+        if schema_text_proto is not None:
+            serialized_schema = text_format.Parse(
+                schema_text_proto, schema_pb2.Schema()
+            ).SerializeToString()
 
-    if serialized_schema:
-      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-          _TEST_SEQUENCE_COLUMN_NAME, serialized_schema)
-    else:
-      coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-          _TEST_SEQUENCE_COLUMN_NAME)
+        if serialized_schema:
+            coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _TEST_SEQUENCE_COLUMN_NAME, serialized_schema
+            )
+        else:
+            coder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _TEST_SEQUENCE_COLUMN_NAME
+            )
 
-    with self.assertRaisesRegex(error, error_msg_regex):
-      coder.DecodeBatch(serialized_sequence_examples)
+        with self.assertRaisesRegex(error, error_msg_regex):
+            coder.DecodeBatch(serialized_sequence_examples)
 
-  def test_sequence_feature_column_name_not_struct_in_schema(self):
-    schema_text_proto = """
+    def test_sequence_feature_column_name_not_struct_in_schema(self):
+        schema_text_proto = """
         feature {
           name: "##SEQUENCE##"
           type: INT
         }
         """
-    serialized_schema = text_format.Parse(
-        schema_text_proto, schema_pb2.Schema()).SerializeToString()
+        serialized_schema = text_format.Parse(
+            schema_text_proto, schema_pb2.Schema()
+        ).SerializeToString()
 
-    error_msg_regex = (
-        "Found a feature in the schema with the sequence_feature_column_name "
-        r"\(i.e., ##SEQUENCE##\) that is not a struct.*")
+        error_msg_regex = (
+            "Found a feature in the schema with the sequence_feature_column_name "
+            r"\(i.e., ##SEQUENCE##\) that is not a struct.*"
+        )
 
-    with self.assertRaisesRegex(RuntimeError, error_msg_regex):
-      sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-          _TEST_SEQUENCE_COLUMN_NAME, serialized_schema)
+        with self.assertRaisesRegex(RuntimeError, error_msg_regex):
+            sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _TEST_SEQUENCE_COLUMN_NAME, serialized_schema
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/coders/tf_graph_record_decoder.py b/tfx_bsl/coders/tf_graph_record_decoder.py
index 8de4ddd0..8d78ebeb 100644
--- a/tfx_bsl/coders/tf_graph_record_decoder.py
+++ b/tfx_bsl/coders/tf_graph_record_decoder.py
@@ -17,9 +17,9 @@
 from typing import Dict, Optional, Union
 
 import tensorflow as tf
-
-from tensorflow.python.framework import composite_tensor  # pylint: disable=g-direct-tensorflow-import
-
+from tensorflow.python.framework import (
+    composite_tensor,  # pylint: disable=g-direct-tensorflow-import
+)
 
 TensorAlike = Union[tf.Tensor, composite_tensor.CompositeTensor]
 
@@ -28,167 +28,179 @@
 
 
 class TFGraphRecordDecoder(metaclass=abc.ABCMeta):
-  """Base class for decoders that turns a list of bytes to (composite) tensors.
-
-  Sub-classes must implement `decode_record()` (see its docstring
-  for requirements).
-
-  Decoder instances can be saved as a SavedModel by `save_decoder()`.
-  The SavedModel can be loaded back by `load_decoder()`. However, the loaded
-  decoder will always be of the type `LoadedDecoder` and only have the public
-  interfaces listed in this base class available.
-  """
-
-  def output_type_specs(self) -> Dict[str, tf.TypeSpec]:
-    """Returns the tf.TypeSpecs of the decoded tensors.
-
-    Returns:
-      A dict whose keys are the same as keys of the dict returned by
-      `decode_record()` and values are the tf.TypeSpec of the corresponding
-      (composite) tensor.
-    """
-    return {
-        k: tf.type_spec_from_value(v) for k, v in
-        self._make_concrete_decode_function().structured_outputs.items()
-    }
-
-  @abc.abstractmethod
-  def decode_record(self, records: tf.Tensor) -> Dict[str, TensorAlike]:
-    """Sub-classes should implement this.
-
-    Implementations must use TF ops to derive the result (composite) tensors, as
-    this function will be traced and become a tf.function (thus a TF Graph).
-    Note that autograph is not enabled in such tracing, which means any python
-    control flow / loops will not be converted to TF cond / loops automatically.
-
-    The returned tensors must be batch-aligned (i.e. they should be at least
-    of rank 1, and their outer-most dimensions must be of the same size). They
-    do not have to be batch-aligned with the input tensor, but if that's the
-    case, an additional tensor must be provided among the results, to indicate
-    which input record a "row" in the output batch comes from. See
-    `record_index_tensor_name` for more details.
-
-    Args:
-      records: a 1-D string tensor that contains the records to be decoded.
-
-    Returns:
-      A dict of (composite) tensors.
-    """
-
-  @property
-  def record_index_tensor_name(self) -> Optional[str]:
-    """The name of the tensor indicating which record a slice is from.
-
-    The decoded tensors are batch-aligned among themselves, but they don't
-    necessarily have to be batch-aligned with the input records. If not,
-    sub-classes should implement this method to tie the batch dimension
-    with the input record.
-
-    The record index tensor must be a SparseTensor or a RaggedTensor of integral
-    type, and must be 2-D and must not contain "missing" values.
+    """Base class for decoders that turns a list of bytes to (composite) tensors.
 
-    A record index tensor like the following:
-    [[0], [0], [2]]
-    means that of 3 "rows" in the output "batch", the first two rows came
-    from the first record, and the 3rd row came from the third record.
+    Sub-classes must implement `decode_record()` (see its docstring
+    for requirements).
 
-    The name must not be an empty string.
-
-    Returns:
-      The name of the record index tensor.
+    Decoder instances can be saved as a SavedModel by `save_decoder()`.
+    The SavedModel can be loaded back by `load_decoder()`. However, the loaded
+    decoder will always be of the type `LoadedDecoder` and only have the public
+    interfaces listed in this base class available.
     """
-    return None
 
-  def _make_concrete_decode_function(self):
-    return (
-        tf.function(
+    def output_type_specs(self) -> Dict[str, tf.TypeSpec]:
+        """Returns the tf.TypeSpecs of the decoded tensors.
+
+        Returns
+        -------
+          A dict whose keys are the same as keys of the dict returned by
+          `decode_record()` and values are the tf.TypeSpec of the corresponding
+          (composite) tensor.
+        """
+        return {
+            k: tf.type_spec_from_value(v)
+            for k, v in self._make_concrete_decode_function().structured_outputs.items()
+        }
+
+    @abc.abstractmethod
+    def decode_record(self, records: tf.Tensor) -> Dict[str, TensorAlike]:
+        """Sub-classes should implement this.
+
+        Implementations must use TF ops to derive the result (composite) tensors, as
+        this function will be traced and become a tf.function (thus a TF Graph).
+        Note that autograph is not enabled in such tracing, which means any python
+        control flow / loops will not be converted to TF cond / loops automatically.
+
+        The returned tensors must be batch-aligned (i.e. they should be at least
+        of rank 1, and their outer-most dimensions must be of the same size). They
+        do not have to be batch-aligned with the input tensor, but if that's the
+        case, an additional tensor must be provided among the results, to indicate
+        which input record a "row" in the output batch comes from. See
+        `record_index_tensor_name` for more details.
+
+        Args:
+        ----
+          records: a 1-D string tensor that contains the records to be decoded.
+
+        Returns:
+        -------
+          A dict of (composite) tensors.
+        """
+
+    @property
+    def record_index_tensor_name(self) -> Optional[str]:
+        """The name of the tensor indicating which record a slice is from.
+
+        The decoded tensors are batch-aligned among themselves, but they don't
+        necessarily have to be batch-aligned with the input records. If not,
+        sub-classes should implement this method to tie the batch dimension
+        with the input record.
+
+        The record index tensor must be a SparseTensor or a RaggedTensor of integral
+        type, and must be 2-D and must not contain "missing" values.
+
+        A record index tensor like the following:
+        [[0], [0], [2]]
+        means that of 3 "rows" in the output "batch", the first two rows came
+        from the first record, and the 3rd row came from the third record.
+
+        The name must not be an empty string.
+
+        Returns
+        -------
+          The name of the record index tensor.
+        """
+        return None
+
+    def _make_concrete_decode_function(self):
+        return tf.function(
             self.decode_record,
             input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)],
-            autograph=False)
-        .get_concrete_function())
+            autograph=False,
+        ).get_concrete_function()
 
-  def save(self, path: str) -> None:
-    """Saves this TFGraphRecordDecoder to a SavedModel at `path`.
+    def save(self, path: str) -> None:
+        """Saves this TFGraphRecordDecoder to a SavedModel at `path`.
 
-    This functions the same as `tf_graph_record_decoder.save_decoder()`. This is
-    provided purely for convenience, and should not impact the actual saved
-    model, since only the `tf.function` from `_make_concrete_decode_function` is
-    saved.
+        This functions the same as `tf_graph_record_decoder.save_decoder()`. This is
+        provided purely for convenience, and should not impact the actual saved
+        model, since only the `tf.function` from `_make_concrete_decode_function` is
+        saved.
 
-    Args:
-      path: The path to where the saved_model is saved.
-    """
-    save_decoder(self, path)
+        Args:
+        ----
+          path: The path to where the saved_model is saved.
+        """
+        save_decoder(self, path)
 
 
-class LoadedDecoder(object):
-  """A decoder recovered from a SavedModel.
+class LoadedDecoder:
+    """A decoder recovered from a SavedModel.
 
-  It has all the public interfaces of a TFGraphRecordDecoder.
-  """
+    It has all the public interfaces of a TFGraphRecordDecoder.
+    """
 
-  def __init__(self, loaded_module: tf.Module):
-    self._decode_fun = loaded_module.decode_fun
-    self._record_index_tensor_name = None
+    def __init__(self, loaded_module: tf.Module):
+        self._decode_fun = loaded_module.decode_fun
+        self._record_index_tensor_name = None
 
-    if hasattr(loaded_module, "signatures"):
-      for signature_name in loaded_module.signatures.keys():
-        if signature_name.startswith(
-            _RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX):
-          record_index_tensor_name = signature_name[
-              len(_RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX):]
-          assert record_index_tensor_name, (
-              "Invalid (empty) record_index_tensor_name")
-          self._record_index_tensor_name = record_index_tensor_name
+        if hasattr(loaded_module, "signatures"):
+            for signature_name in loaded_module.signatures.keys():
+                if signature_name.startswith(
+                    _RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX
+                ):
+                    record_index_tensor_name = signature_name[
+                        len(_RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX) :
+                    ]
+                    assert (
+                        record_index_tensor_name
+                    ), "Invalid (empty) record_index_tensor_name"
+                    self._record_index_tensor_name = record_index_tensor_name
 
-    assert isinstance(self._decode_fun.structured_outputs, dict)
-    # Note that a loaded concrete function's structured_outputs are already
-    # TensorSpecs (instead of TensorAlikes).
-    self._output_type_specs = self._decode_fun.structured_outputs.copy()
+        assert isinstance(self._decode_fun.structured_outputs, dict)
+        # Note that a loaded concrete function's structured_outputs are already
+        # TensorSpecs (instead of TensorAlikes).
+        self._output_type_specs = self._decode_fun.structured_outputs.copy()
 
-  def decode_record(self, record: tf.Tensor) -> Dict[str, TensorAlike]:
-    return self._decode_fun(record)
+    def decode_record(self, record: tf.Tensor) -> Dict[str, TensorAlike]:
+        return self._decode_fun(record)
 
-  def output_type_specs(self) -> Dict[str, tf.TypeSpec]:
-    return self._output_type_specs
+    def output_type_specs(self) -> Dict[str, tf.TypeSpec]:
+        return self._output_type_specs
 
-  @property
-  def record_index_tensor_name(self) -> Optional[str]:
-    return self._record_index_tensor_name
+    @property
+    def record_index_tensor_name(self) -> Optional[str]:
+        return self._record_index_tensor_name
 
 
 def save_decoder(decoder: TFGraphRecordDecoder, path: str) -> None:
-  """Saves a TFGraphRecordDecoder to a SavedModel."""
-  m = tf.Module()
-  m.decode_fun = decoder._make_concrete_decode_function()  # pylint:disable=protected-access
-
-  signatures = dict()
-  if decoder.record_index_tensor_name is not None:
-    assert decoder.record_index_tensor_name, (
-        "Invalid (empty) record_index_tensor_name")
-    assert decoder.record_index_tensor_name in decoder.output_type_specs(), (
-        "Invalid decoder: record_index_tensor_name: {} not in output "
-        "tensors: {}".format(decoder.record_index_tensor_name,
-                             decoder.output_type_specs().keys()))
-
-    @tf.function(input_signature=[])
-    def record_index_tensor_name_fun():
-      return decoder.record_index_tensor_name
-    # We also encode the record index tensor name in the name of a signature.
-    # This way, we do not need to evaluate a tensor or a TF Function in order
-    # to know the name when loading a decoder back.
-    signatures = {
-        "%s%s" % (_RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX,
-                  decoder.record_index_tensor_name):
-            record_index_tensor_name_fun.get_concrete_function()
-    }
-
-  tf.saved_model.save(m, path, signatures=signatures)
+    """Saves a TFGraphRecordDecoder to a SavedModel."""
+    m = tf.Module()
+    m.decode_fun = decoder._make_concrete_decode_function()  # pylint:disable=protected-access
+
+    signatures = dict()
+    if decoder.record_index_tensor_name is not None:
+        assert (
+            decoder.record_index_tensor_name
+        ), "Invalid (empty) record_index_tensor_name"
+        assert decoder.record_index_tensor_name in decoder.output_type_specs(), (
+            f"Invalid decoder: record_index_tensor_name: {decoder.record_index_tensor_name} not in output "
+            f"tensors: {decoder.output_type_specs().keys()}"
+        )
+
+        @tf.function(input_signature=[])
+        def record_index_tensor_name_fun():
+            return decoder.record_index_tensor_name
+
+        # We also encode the record index tensor name in the name of a signature.
+        # This way, we do not need to evaluate a tensor or a TF Function in order
+        # to know the name when loading a decoder back.
+        signatures = {
+            "%s%s"
+            % (
+                _RECORD_INDEX_TENSOR_NAME_SIGNATURE_PREFIX,
+                decoder.record_index_tensor_name,
+            ): record_index_tensor_name_fun.get_concrete_function()
+        }
+
+    tf.saved_model.save(m, path, signatures=signatures)
 
 
 def load_decoder(path: str) -> LoadedDecoder:
-  """Loads a TFGraphRecordDecoder from a SavedModel."""
-  loaded_module = tf.saved_model.load(path)
-  assert hasattr(loaded_module, "decode_fun"), (
-      "the SavedModel is not a TFGraphRecordDecoder")
-  return LoadedDecoder(loaded_module)
+    """Loads a TFGraphRecordDecoder from a SavedModel."""
+    loaded_module = tf.saved_model.load(path)
+    assert hasattr(
+        loaded_module, "decode_fun"
+    ), "the SavedModel is not a TFGraphRecordDecoder"
+    return LoadedDecoder(loaded_module)
diff --git a/tfx_bsl/coders/tf_graph_record_decoder_test.py b/tfx_bsl/coders/tf_graph_record_decoder_test.py
index 82250378..fdc39852 100644
--- a/tfx_bsl/coders/tf_graph_record_decoder_test.py
+++ b/tfx_bsl/coders/tf_graph_record_decoder_test.py
@@ -13,154 +13,159 @@
 # limitations under the License.
 """Tests for tfx_bsl.coders.tf_graph_record_decoder."""
 
-import pytest
 import os
 import tempfile
 
-from absl import flags
+import pytest
 import tensorflow as tf
-from tfx_bsl.coders import tf_graph_record_decoder
+from absl import flags
 
+from tfx_bsl.coders import tf_graph_record_decoder
 
 FLAGS = flags.FLAGS
 
 
 class _DecoderForTesting(tf_graph_record_decoder.TFGraphRecordDecoder):
-
-  def decode_record(self, record):
-    indices = tf.transpose(tf.stack([
-        tf.range(tf.size(record), dtype=tf.int64),
-        tf.zeros(tf.size(record), dtype=tf.int64)
-    ]))
-    sparse = tf.SparseTensor(
-                values=record,
-                indices=indices,
-                dense_shape=[tf.size(record), 1])
-    return {
-        "sparse_tensor": sparse,
-        "ragged_tensor": tf.RaggedTensor.from_sparse(sparse),
-        "record_index": tf.RaggedTensor.from_row_splits(
-            values=tf.range(tf.size(record), dtype=tf.int64),
-            row_splits=tf.range(tf.size(record) + 1, dtype=tf.int64)),
-        "dense_tensor": record,
-    }
+    def decode_record(self, record):
+        indices = tf.transpose(
+            tf.stack(
+                [
+                    tf.range(tf.size(record), dtype=tf.int64),
+                    tf.zeros(tf.size(record), dtype=tf.int64),
+                ]
+            )
+        )
+        sparse = tf.SparseTensor(
+            values=record, indices=indices, dense_shape=[tf.size(record), 1]
+        )
+        return {
+            "sparse_tensor": sparse,
+            "ragged_tensor": tf.RaggedTensor.from_sparse(sparse),
+            "record_index": tf.RaggedTensor.from_row_splits(
+                values=tf.range(tf.size(record), dtype=tf.int64),
+                row_splits=tf.range(tf.size(record) + 1, dtype=tf.int64),
+            ),
+            "dense_tensor": record,
+        }
 
 
 class _DecoderForTestWithRecordIndexTensorName(_DecoderForTesting):
-
-  @property
-  def record_index_tensor_name(self):
-    return "record_index"
+    @property
+    def record_index_tensor_name(self):
+        return "record_index"
 
 
 class _DecoderForTestWithInvalidRecordIndexTensorName(_DecoderForTesting):
-
-  @property
-  def record_index_tensor_name(self):
-    return "does_not_exist"
+    @property
+    def record_index_tensor_name(self):
+        return "does_not_exist"
 
 
 class TfGraphRecordDecoderTest(tf.test.TestCase):
-
-  def setUp(self):
-    super().setUp()
-    self._tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
-
-  def _assert_type_specs_equal(self, lhs, rhs):
-    self.assertLen(lhs, len(rhs))
-    for k, spec in lhs.items():
-      self.assertIn(k, rhs)
-      # special handling for tf.TensorSpec to ignore the difference in .name.
-      if isinstance(spec, tf.TensorSpec):
-        self.assertIsInstance(rhs[k], tf.TensorSpec)
-        self.assertEqual(spec.shape.as_list(), rhs[k].shape.as_list())
-        self.assertEqual(spec.dtype, rhs[k].dtype)
-        continue
-      self.assertEqual(spec, rhs[k])
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def test_save_load_decode(self):
-    decoder = _DecoderForTestWithRecordIndexTensorName()
-    actual_type_specs = decoder.output_type_specs()
-    actual_sparse_tensor_spec = actual_type_specs.pop("sparse_tensor")
-    # The expected shape is [None, 1], but due to a TensorFlow bug, it could
-    # be [None, None] in older TF versions.
-    self.assertTrue(actual_sparse_tensor_spec ==
-                    tf.SparseTensorSpec(shape=[None, None], dtype=tf.string) or
-                    actual_sparse_tensor_spec == tf.SparseTensorSpec(
-                        shape=[None, 1], dtype=tf.string))
-    self.assertEqual(
-        actual_type_specs, {
-            "ragged_tensor":
-                tf.RaggedTensorSpec(
-                    shape=[None, None], dtype=tf.string, ragged_rank=1),
-            "record_index":
-                tf.RaggedTensorSpec(
-                    shape=[None, None], dtype=tf.int64, ragged_rank=1),
-            "dense_tensor":
-                tf.TensorSpec(shape=[None], dtype=tf.string)
-        })
-    self.assertEqual(decoder.record_index_tensor_name, "record_index")
-    tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
-    loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
-    self.assertEqual(loaded.record_index_tensor_name, "record_index")
-
-    self._assert_type_specs_equal(decoder.output_type_specs(),
-                                  loaded.output_type_specs())
-
-    records = [b"abc", b"def"]
-    got = loaded.decode_record(records)
-    self.assertLen(got, len(loaded.output_type_specs()))
-    self.assertIn("sparse_tensor", got)
-    st = got["sparse_tensor"]
-    self.assertAllEqual(st.values, records)
-    self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
-    self.assertAllEqual(st.dense_shape, [2, 1])
-
-    rt = got["ragged_tensor"]
-    self.assertAllEqual(rt, tf.ragged.constant([[b"abc"], [b"def"]]))
-
-    rt = got["record_index"]
-    self.assertAllEqual(rt, tf.ragged.constant([[0], [1]]))
-
-    dt = got["dense_tensor"]
-    self.assertAllEqual(dt, records)
-
-    # Also test that .record_index_tensor_name can be accessed in graph
-    # mode.
-    with tf.compat.v1.Graph().as_default():
-      self.assertFalse(tf.executing_eagerly())
-      loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
-      self.assertEqual(loaded.record_index_tensor_name, "record_index")
-
-    # Also test that the decoder's class method `save_decoder` works.
-    new_decoder_path = (os.path.join(self._tmp_dir, "decoder_2"))
-    decoder.save(new_decoder_path)
-    loaded = tf_graph_record_decoder.load_decoder(new_decoder_path)
-    self.assertEqual(loaded.record_index_tensor_name, "record_index")
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def test_no_record_index_tensor_name(self):
-    decoder = _DecoderForTesting()
-    self.assertIsNone(decoder.record_index_tensor_name)
-
-    tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
-    loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
-    self._assert_type_specs_equal(decoder.output_type_specs(),
-                                  loaded.output_type_specs())
-    self.assertIsNone(loaded.record_index_tensor_name)
-
-    with tf.compat.v1.Graph().as_default():
-      self.assertFalse(tf.executing_eagerly())
-      loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
-      self.assertIsNone(loaded.record_index_tensor_name)
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def test_do_not_save_if_record_index_tensor_name_invalid(self):
-    decoder = _DecoderForTestWithInvalidRecordIndexTensorName()
-    with self.assertRaisesRegex(AssertionError, "record_index_tensor_name"):
-      tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
+    def setUp(self):
+        super().setUp()
+        self._tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
+
+    def _assert_type_specs_equal(self, lhs, rhs):
+        self.assertLen(lhs, len(rhs))
+        for k, spec in lhs.items():
+            self.assertIn(k, rhs)
+            # special handling for tf.TensorSpec to ignore the difference in .name.
+            if isinstance(spec, tf.TensorSpec):
+                self.assertIsInstance(rhs[k], tf.TensorSpec)
+                self.assertEqual(spec.shape.as_list(), rhs[k].shape.as_list())
+                self.assertEqual(spec.dtype, rhs[k].dtype)
+                continue
+            self.assertEqual(spec, rhs[k])
+
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def test_save_load_decode(self):
+        decoder = _DecoderForTestWithRecordIndexTensorName()
+        actual_type_specs = decoder.output_type_specs()
+        actual_sparse_tensor_spec = actual_type_specs.pop("sparse_tensor")
+        # The expected shape is [None, 1], but due to a TensorFlow bug, it could
+        # be [None, None] in older TF versions.
+        self.assertTrue(
+            actual_sparse_tensor_spec
+            == tf.SparseTensorSpec(shape=[None, None], dtype=tf.string)
+            or actual_sparse_tensor_spec
+            == tf.SparseTensorSpec(shape=[None, 1], dtype=tf.string)
+        )
+        self.assertEqual(
+            actual_type_specs,
+            {
+                "ragged_tensor": tf.RaggedTensorSpec(
+                    shape=[None, None], dtype=tf.string, ragged_rank=1
+                ),
+                "record_index": tf.RaggedTensorSpec(
+                    shape=[None, None], dtype=tf.int64, ragged_rank=1
+                ),
+                "dense_tensor": tf.TensorSpec(shape=[None], dtype=tf.string),
+            },
+        )
+        self.assertEqual(decoder.record_index_tensor_name, "record_index")
+        tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
+        loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
+        self.assertEqual(loaded.record_index_tensor_name, "record_index")
+
+        self._assert_type_specs_equal(
+            decoder.output_type_specs(), loaded.output_type_specs()
+        )
+
+        records = [b"abc", b"def"]
+        got = loaded.decode_record(records)
+        self.assertLen(got, len(loaded.output_type_specs()))
+        self.assertIn("sparse_tensor", got)
+        st = got["sparse_tensor"]
+        self.assertAllEqual(st.values, records)
+        self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
+        self.assertAllEqual(st.dense_shape, [2, 1])
+
+        rt = got["ragged_tensor"]
+        self.assertAllEqual(rt, tf.ragged.constant([[b"abc"], [b"def"]]))
+
+        rt = got["record_index"]
+        self.assertAllEqual(rt, tf.ragged.constant([[0], [1]]))
+
+        dt = got["dense_tensor"]
+        self.assertAllEqual(dt, records)
+
+        # Also test that .record_index_tensor_name can be accessed in graph
+        # mode.
+        with tf.compat.v1.Graph().as_default():
+            self.assertFalse(tf.executing_eagerly())
+            loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
+            self.assertEqual(loaded.record_index_tensor_name, "record_index")
+
+        # Also test that the decoder's class method `save_decoder` works.
+        new_decoder_path = os.path.join(self._tmp_dir, "decoder_2")
+        decoder.save(new_decoder_path)
+        loaded = tf_graph_record_decoder.load_decoder(new_decoder_path)
+        self.assertEqual(loaded.record_index_tensor_name, "record_index")
+
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def test_no_record_index_tensor_name(self):
+        decoder = _DecoderForTesting()
+        self.assertIsNone(decoder.record_index_tensor_name)
+
+        tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
+        loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
+        self._assert_type_specs_equal(
+            decoder.output_type_specs(), loaded.output_type_specs()
+        )
+        self.assertIsNone(loaded.record_index_tensor_name)
+
+        with tf.compat.v1.Graph().as_default():
+            self.assertFalse(tf.executing_eagerly())
+            loaded = tf_graph_record_decoder.load_decoder(self._tmp_dir)
+            self.assertIsNone(loaded.record_index_tensor_name)
+
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def test_do_not_save_if_record_index_tensor_name_invalid(self):
+        decoder = _DecoderForTestWithInvalidRecordIndexTensorName()
+        with self.assertRaisesRegex(AssertionError, "record_index_tensor_name"):
+            tf_graph_record_decoder.save_decoder(decoder, self._tmp_dir)
 
 
 if __name__ == "__main__":
-  tf.test.main()
+    tf.test.main()
diff --git a/tfx_bsl/docs/schema_interpretation.md b/tfx_bsl/docs/schema_interpretation.md
index 82c2acd3..192b9048 100644
--- a/tfx_bsl/docs/schema_interpretation.md
+++ b/tfx_bsl/docs/schema_interpretation.md
@@ -43,7 +43,7 @@ give some examples of advanced usage.
 
 ### Primitive types
 
-<!-- BEGIN GOOGLE-INTERNAL 
+<!-- BEGIN GOOGLE-INTERNAL
 
 Schema interpretation logic has two versions controlled by
 [`generate_legacy_feature_spec`](https://github.com/tensorflow/metadata/blob/master/tensorflow_metadata/proto/v0/schema.proto#L115):
@@ -64,7 +64,7 @@ Schema interpretation logic has two versions controlled by
 1.  Otherwise, a `tf.io.VarLenFeature` of the corresponding type will be
     produced.
 
-<!-- BEGIN GOOGLE-INTERNAL 
+<!-- BEGIN GOOGLE-INTERNAL
 
 </section>
 
diff --git a/tfx_bsl/public/beam/run_inference.py b/tfx_bsl/public/beam/run_inference.py
index ca82a17e..02f281b4 100644
--- a/tfx_bsl/public/beam/run_inference.py
+++ b/tfx_bsl/public/beam/run_inference.py
@@ -17,16 +17,14 @@
 
 import apache_beam as beam
 from apache_beam.ml.inference.base import ModelHandler
-from tfx_bsl.beam import run_inference
-from tfx_bsl.public.proto import model_spec_pb2
-
 from tensorflow_serving.apis import prediction_log_pb2
 
+from tfx_bsl.beam import run_inference
+from tfx_bsl.public.proto import model_spec_pb2
 
-_K = TypeVar('_K')
+_K = TypeVar("_K")
 _KeyedBatchesInput = Tuple[_K, List[run_inference.InputType]]
-_MaybeKeyedInput = Union[run_inference.InputType,
-                         Tuple[_K, run_inference.InputType]]
+_MaybeKeyedInput = Union[run_inference.InputType, Tuple[_K, run_inference.InputType]]
 _OUTPUT_TYPE = prediction_log_pb2.PredictionLog
 
 
@@ -45,130 +43,138 @@
 def RunInference(
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
-    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None
+    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None,
 ) -> beam.pvalue.PCollection:
-  """Run inference with a model.
-
-  There are two types of inference you can perform using this PTransform:
-    1. In-process inference from a SavedModel instance. Used when
-      `saved_model_spec` field is set in `inference_spec_type`.
-    2. Remote inference by using a service endpoint. Used when
-      `ai_platform_prediction_model_spec` field is set in
-      `inference_spec_type`.
-
-  Args:
-    examples: A PCollection containing examples of the following possible kinds,
-      each with their corresponding return type.
-        - PCollection[Example]                   -> PCollection[PredictionLog]
-            * Works with Classify, Regress, MultiInference, Predict and
-              RemotePredict.
-
-        - PCollection[SequenceExample]           -> PCollection[PredictionLog]
-            * Works with Predict and (serialized) RemotePredict.
-
-        - PCollection[bytes]                     -> PCollection[PredictionLog]
-            * For serialized Example: Works with Classify, Regress,
-              MultiInference, Predict and RemotePredict.
-            * For everything else: Works with Predict and RemotePredict.
-
-        - PCollection[Tuple[K, Example]]         -> PCollection[
-                                                        Tuple[K, PredictionLog]]
-            * Works with Classify, Regress, MultiInference, Predict and
-              RemotePredict.
-
-        - PCollection[Tuple[K, SequenceExample]] -> PCollection[
-                                                        Tuple[K, PredictionLog]]
-            * Works with Predict and (serialized) RemotePredict.
-
-        - PCollection[Tuple[K, bytes]]           -> PCollection[
-                                                        Tuple[K, PredictionLog]]
-            * For serialized Example: Works with Classify, Regress,
-              MultiInference, Predict and RemotePredict.
-            * For everything else: Works with Predict and RemotePredict.
-
-    inference_spec_type: Model inference endpoint.
-    load_override_fn: Optional function taking a model path and sequence of
-      tags, and returning a tf SavedModel. The loaded model must be equivalent
-      in interface to the model that would otherwise be loaded. It is up to the
-      caller to ensure compatibility. This argument is experimental and subject
-      to change.
-
-  Returns:
-    A PCollection (possibly keyed) containing prediction logs.
-  """
-  return (examples
-          | 'RunInferenceImpl' >> run_inference.RunInferenceImpl(
-              inference_spec_type, load_override_fn))
+    """Run inference with a model.
+
+    There are two types of inference you can perform using this PTransform:
+      1. In-process inference from a SavedModel instance. Used when
+        `saved_model_spec` field is set in `inference_spec_type`.
+      2. Remote inference by using a service endpoint. Used when
+        `ai_platform_prediction_model_spec` field is set in
+        `inference_spec_type`.
+
+    Args:
+    ----
+      examples: A PCollection containing examples of the following possible kinds,
+        each with their corresponding return type.
+          - PCollection[Example]                   -> PCollection[PredictionLog]
+              * Works with Classify, Regress, MultiInference, Predict and
+                RemotePredict.
+
+          - PCollection[SequenceExample]           -> PCollection[PredictionLog]
+              * Works with Predict and (serialized) RemotePredict.
+
+          - PCollection[bytes]                     -> PCollection[PredictionLog]
+              * For serialized Example: Works with Classify, Regress,
+                MultiInference, Predict and RemotePredict.
+              * For everything else: Works with Predict and RemotePredict.
+
+          - PCollection[Tuple[K, Example]]         -> PCollection[
+                                                          Tuple[K, PredictionLog]]
+              * Works with Classify, Regress, MultiInference, Predict and
+                RemotePredict.
+
+          - PCollection[Tuple[K, SequenceExample]] -> PCollection[
+                                                          Tuple[K, PredictionLog]]
+              * Works with Predict and (serialized) RemotePredict.
+
+          - PCollection[Tuple[K, bytes]]           -> PCollection[
+                                                          Tuple[K, PredictionLog]]
+              * For serialized Example: Works with Classify, Regress,
+                MultiInference, Predict and RemotePredict.
+              * For everything else: Works with Predict and RemotePredict.
+
+      inference_spec_type: Model inference endpoint.
+      load_override_fn: Optional function taking a model path and sequence of
+        tags, and returning a tf SavedModel. The loaded model must be equivalent
+        in interface to the model that would otherwise be loaded. It is up to the
+        caller to ensure compatibility. This argument is experimental and subject
+        to change.
+
+    Returns:
+    -------
+      A PCollection (possibly keyed) containing prediction logs.
+    """
+    return examples | "RunInferenceImpl" >> run_inference.RunInferenceImpl(
+        inference_spec_type, load_override_fn
+    )
 
 
 @beam.ptransform_fn
 @beam.typehints.with_input_types(_MaybeKeyedInput)
-@beam.typehints.with_output_types(Union[Tuple[_OUTPUT_TYPE, ...],
-                                        Tuple[_K, Tuple[_OUTPUT_TYPE, ...]]])
+@beam.typehints.with_output_types(
+    Union[Tuple[_OUTPUT_TYPE, ...], Tuple[_K, Tuple[_OUTPUT_TYPE, ...]]]
+)
 def RunInferencePerModel(
     examples: beam.pvalue.PCollection,
     inference_spec_types: Iterable[model_spec_pb2.InferenceSpecType],
-    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None
+    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None,
 ) -> beam.pvalue.PCollection:
-  """Vectorized variant of RunInference (useful for ensembles).
-
-  Args:
-    examples: A PCollection containing examples of the following possible kinds,
-      each with their corresponding return type.
-        - PCollection[Example]                  -> PCollection[
-                                                     Tuple[PredictionLog, ...]]
-            * Works with Classify, Regress, MultiInference, Predict and
-              RemotePredict.
-
-        - PCollection[SequenceExample]          -> PCollection[
-                                                     Tuple[PredictionLog, ...]]
-            * Works with Predict and (serialized) RemotePredict.
-
-        - PCollection[bytes]                    -> PCollection[
-                                                     Tuple[PredictionLog, ...]]
-            * For serialized Example: Works with Classify, Regress,
-              MultiInference, Predict and RemotePredict.
-            * For everything else: Works with Predict and RemotePredict.
-
-        - PCollection[Tuple[K, Example]]        -> PCollection[
-                                                     Tuple[K,
-                                                           Tuple[PredictionLog,
-                                                                 ...]]]
-            * Works with Classify, Regress, MultiInference, Predict and
-              RemotePredict.
-
-        - PCollection[Tuple[K, SequenceExample]] -> PCollection[
-                                                     Tuple[K,
-                                                           Tuple[PredictionLog,
-                                                                 ...]]]
-            * Works with Predict and (serialized) RemotePredict.
-
-        - PCollection[Tuple[K, bytes]]           -> PCollection[
-                                                     Tuple[K,
-                                                           Tuple[PredictionLog,
-                                                                 ...]]]
-            * For serialized Example: Works with Classify, Regress,
-              MultiInference, Predict and RemotePredict.
-            * For everything else: Works with Predict and RemotePredict.
-
-    inference_spec_types: A flat iterable of Model inference endpoints.
-      Inference will happen in a fused fashion (ie without data
-      materialization), sequentially across Models within a Beam thread (but
-      in parallel across threads and workers).
-    load_override_fn: Optional function taking a model path and sequence of
-      tags, and returning a tf SavedModel. The loaded model must be equivalent
-      in interface to the model that would otherwise be loaded. It is up to the
-      caller to ensure compatibility. This argument is experimental and subject
-      to change.
-
-  Returns:
-    A PCollection (possibly keyed) containing a Tuple of prediction logs. The
-    Tuple of prediction logs is 1-1 aligned with inference_spec_types.
-  """
-  return (examples
-          |
-          'RunInferencePerModelImpl' >> run_inference.RunInferencePerModelImpl(
-              inference_spec_types, load_override_fn))
+    """Vectorized variant of RunInference (useful for ensembles).
+
+    Args:
+    ----
+      examples: A PCollection containing examples of the following possible kinds,
+        each with their corresponding return type.
+          - PCollection[Example]                  -> PCollection[
+                                                       Tuple[PredictionLog, ...]]
+              * Works with Classify, Regress, MultiInference, Predict and
+                RemotePredict.
+
+          - PCollection[SequenceExample]          -> PCollection[
+                                                       Tuple[PredictionLog, ...]]
+              * Works with Predict and (serialized) RemotePredict.
+
+          - PCollection[bytes]                    -> PCollection[
+                                                       Tuple[PredictionLog, ...]]
+              * For serialized Example: Works with Classify, Regress,
+                MultiInference, Predict and RemotePredict.
+              * For everything else: Works with Predict and RemotePredict.
+
+          - PCollection[Tuple[K, Example]]        -> PCollection[
+                                                       Tuple[K,
+                                                             Tuple[PredictionLog,
+                                                                   ...]]]
+              * Works with Classify, Regress, MultiInference, Predict and
+                RemotePredict.
+
+          - PCollection[Tuple[K, SequenceExample]] -> PCollection[
+                                                       Tuple[K,
+                                                             Tuple[PredictionLog,
+                                                                   ...]]]
+              * Works with Predict and (serialized) RemotePredict.
+
+          - PCollection[Tuple[K, bytes]]           -> PCollection[
+                                                       Tuple[K,
+                                                             Tuple[PredictionLog,
+                                                                   ...]]]
+              * For serialized Example: Works with Classify, Regress,
+                MultiInference, Predict and RemotePredict.
+              * For everything else: Works with Predict and RemotePredict.
+
+      inference_spec_types: A flat iterable of Model inference endpoints.
+        Inference will happen in a fused fashion (ie without data
+        materialization), sequentially across Models within a Beam thread (but
+        in parallel across threads and workers).
+      load_override_fn: Optional function taking a model path and sequence of
+        tags, and returning a tf SavedModel. The loaded model must be equivalent
+        in interface to the model that would otherwise be loaded. It is up to the
+        caller to ensure compatibility. This argument is experimental and subject
+        to change.
+
+    Returns:
+    -------
+      A PCollection (possibly keyed) containing a Tuple of prediction logs. The
+      Tuple of prediction logs is 1-1 aligned with inference_spec_types.
+    """
+    return (
+        examples
+        | "RunInferencePerModelImpl"
+        >> run_inference.RunInferencePerModelImpl(
+            inference_spec_types, load_override_fn
+        )
+    )
 
 
 @beam.ptransform_fn
@@ -177,36 +183,40 @@ def RunInferencePerModel(
 def RunInferenceOnKeyedBatches(
     examples: beam.pvalue.PCollection,
     inference_spec_type: model_spec_pb2.InferenceSpecType,
-    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None
+    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None,
 ) -> beam.pvalue.PCollection:
-  """Run inference over pre-batched keyed inputs.
-
-  This API is experimental and may change in the future.
-
-  Supports the same inference specs as RunInference. Inputs must consist of a
-  keyed list of examples, and outputs consist of keyed list of prediction logs
-  corresponding by index.
-
-  Args:
-    examples: A PCollection of keyed, batched inputs of type Example,
-      SequenceExample, or bytes. Each type support inference specs corresponding
-      to the unbatched cases described in RunInference. Supports
-        - PCollection[Tuple[K, List[Example]]]
-        - PCollection[Tuple[K, List[SequenceExample]]]
-        - PCollection[Tuple[K, List[Bytes]]]
-    inference_spec_type: Model inference endpoint.
-    load_override_fn: Optional function taking a model path and sequence of
-      tags, and returning a tf SavedModel. The loaded model must be equivalent
-      in interface to the model that would otherwise be loaded. It is up to the
-      caller to ensure compatibility. This argument is experimental and subject
-      to change.
-
-  Returns:
-    A PCollection of Tuple[K, List[PredictionLog]].
-  """
-  return (examples
-          | 'RunInferenceOnKeyedBatchesImpl' >> run_inference.RunInferenceImpl(
-              inference_spec_type, load_override_fn))
+    """Run inference over pre-batched keyed inputs.
+
+    This API is experimental and may change in the future.
+
+    Supports the same inference specs as RunInference. Inputs must consist of a
+    keyed list of examples, and outputs consist of keyed list of prediction logs
+    corresponding by index.
+
+    Args:
+    ----
+      examples: A PCollection of keyed, batched inputs of type Example,
+        SequenceExample, or bytes. Each type support inference specs corresponding
+        to the unbatched cases described in RunInference. Supports
+          - PCollection[Tuple[K, List[Example]]]
+          - PCollection[Tuple[K, List[SequenceExample]]]
+          - PCollection[Tuple[K, List[Bytes]]]
+      inference_spec_type: Model inference endpoint.
+      load_override_fn: Optional function taking a model path and sequence of
+        tags, and returning a tf SavedModel. The loaded model must be equivalent
+        in interface to the model that would otherwise be loaded. It is up to the
+        caller to ensure compatibility. This argument is experimental and subject
+        to change.
+
+    Returns:
+    -------
+      A PCollection of Tuple[K, List[PredictionLog]].
+    """
+    return (
+        examples
+        | "RunInferenceOnKeyedBatchesImpl"
+        >> run_inference.RunInferenceImpl(inference_spec_type, load_override_fn)
+    )
 
 
 @beam.ptransform_fn
@@ -215,73 +225,81 @@ def RunInferenceOnKeyedBatches(
 def RunInferencePerModelOnKeyedBatches(
     examples: beam.pvalue.PCollection,
     inference_spec_types: Iterable[model_spec_pb2.InferenceSpecType],
-    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None
+    load_override_fn: Optional[run_inference.LoadOverrideFnType] = None,
 ) -> beam.pvalue.PCollection:
-  """Run inference over pre-batched keyed inputs on multiple models.
-
-  This API is experimental and may change in the future.
-
-  Supports the same inference specs as RunInferencePerModel. Inputs must consist
-  of a keyed list of examples, and outputs consist of keyed list of prediction
-  logs corresponding by index.
-
-  Args:
-    examples: A PCollection of keyed, batched inputs of type Example,
-      SequenceExample, or bytes. Each type support inference specs corresponding
-      to the unbatched cases described in RunInferencePerModel. Supports -
-      PCollection[Tuple[K, List[Example]]] - PCollection[Tuple[K,
-      List[SequenceExample]]] - PCollection[Tuple[K, List[Bytes]]]
-    inference_spec_types: A flat iterable of Model inference endpoints.
-      Inference will happen in a fused fashion (ie without data
-      materialization), sequentially across Models within a Beam thread (but in
-      parallel across threads and workers).
-    load_override_fn: Optional function taking a model path and sequence of
-      tags, and returning a tf SavedModel. The loaded model must be equivalent
-      in interface to the model that would otherwise be loaded. It is up to the
-      caller to ensure compatibility. This argument is experimental and subject
-      to change.
-
-  Returns:
-    A PCollection containing Tuples of a key and lists of batched prediction
-    logs from each model provided in inference_spec_types. The Tuple of batched
-    prediction logs is 1-1 aligned with inference_spec_types. The individual
-    prediction logs in the batch are 1-1 aligned with the rows of data in the
-    batch key.
-  """
-  return (examples
-          | 'RunInferencePerModelOnKeyedBatchesImpl' >>
-          run_inference.RunInferencePerModelImpl(inference_spec_types,
-                                                 load_override_fn))
+    """Run inference over pre-batched keyed inputs on multiple models.
+
+    This API is experimental and may change in the future.
+
+    Supports the same inference specs as RunInferencePerModel. Inputs must consist
+    of a keyed list of examples, and outputs consist of keyed list of prediction
+    logs corresponding by index.
+
+    Args:
+    ----
+      examples: A PCollection of keyed, batched inputs of type Example,
+        SequenceExample, or bytes. Each type support inference specs corresponding
+        to the unbatched cases described in RunInferencePerModel. Supports -
+        PCollection[Tuple[K, List[Example]]] - PCollection[Tuple[K,
+        List[SequenceExample]]] - PCollection[Tuple[K, List[Bytes]]]
+      inference_spec_types: A flat iterable of Model inference endpoints.
+        Inference will happen in a fused fashion (ie without data
+        materialization), sequentially across Models within a Beam thread (but in
+        parallel across threads and workers).
+      load_override_fn: Optional function taking a model path and sequence of
+        tags, and returning a tf SavedModel. The loaded model must be equivalent
+        in interface to the model that would otherwise be loaded. It is up to the
+        caller to ensure compatibility. This argument is experimental and subject
+        to change.
+
+    Returns:
+    -------
+      A PCollection containing Tuples of a key and lists of batched prediction
+      logs from each model provided in inference_spec_types. The Tuple of batched
+      prediction logs is 1-1 aligned with inference_spec_types. The individual
+      prediction logs in the batch are 1-1 aligned with the rows of data in the
+      batch key.
+    """
+    return (
+        examples
+        | "RunInferencePerModelOnKeyedBatchesImpl"
+        >> run_inference.RunInferencePerModelImpl(
+            inference_spec_types, load_override_fn
+        )
+    )
 
 
 def CreateModelHandler(
-    inference_spec_type: model_spec_pb2.InferenceSpecType) -> ModelHandler:
-  """Creates a Beam ModelHandler based on the inference spec type.
-
-  There are two model handlers:
-    1. In-process inference from a SavedModel instance. Used when
-      `saved_model_spec` field is set in `inference_spec_type`.
-    2. Remote inference by using a service endpoint. Used when
-      `ai_platform_prediction_model_spec` field is set in
-      `inference_spec_type`.
-
-  Example Usage:
-
-    ```
-    from apache_beam.ml.inference import base
-
-    tf_handler = CreateModelHandler(inference_spec_type)
-    # unkeyed
-    base.RunInference(tf_handler)
-
-    # keyed
-    base.RunInference(base.KeyedModelHandler(tf_handler))
-    ```
-
-  Args:
-    inference_spec_type: Model inference endpoint.
-
-  Returns:
-    A Beam RunInference ModelHandler for TensorFlow
-  """
-  return run_inference.create_model_handler(inference_spec_type, None, None)
+    inference_spec_type: model_spec_pb2.InferenceSpecType,
+) -> ModelHandler:
+    """Creates a Beam ModelHandler based on the inference spec type.
+
+    There are two model handlers:
+      1. In-process inference from a SavedModel instance. Used when
+        `saved_model_spec` field is set in `inference_spec_type`.
+      2. Remote inference by using a service endpoint. Used when
+        `ai_platform_prediction_model_spec` field is set in
+        `inference_spec_type`.
+
+    Example Usage:
+
+      ```
+      from apache_beam.ml.inference import base
+
+      tf_handler = CreateModelHandler(inference_spec_type)
+      # unkeyed
+      base.RunInference(tf_handler)
+
+      # keyed
+      base.RunInference(base.KeyedModelHandler(tf_handler))
+      ```
+
+    Args:
+    ----
+      inference_spec_type: Model inference endpoint.
+
+    Returns:
+    -------
+      A Beam RunInference ModelHandler for TensorFlow
+    """
+    return run_inference.create_model_handler(inference_spec_type, None, None)
diff --git a/tfx_bsl/public/tfxio/__init__.py b/tfx_bsl/public/tfxio/__init__.py
index 6ba3903c..490b13e0 100644
--- a/tfx_bsl/public/tfxio/__init__.py
+++ b/tfx_bsl/public/tfxio/__init__.py
@@ -21,15 +21,16 @@
 
 from tfx_bsl.coders.example_coder import RecordBatchToExamplesEncoder
 from tfx_bsl.coders.tf_graph_record_decoder import TFGraphRecordDecoder
-from tfx_bsl.tfxio.csv_tfxio import BeamRecordCsvTFXIO
-from tfx_bsl.tfxio.csv_tfxio import CsvTFXIO
-from tfx_bsl.tfxio.dataset_options import RecordBatchesOptions
-from tfx_bsl.tfxio.dataset_options import TensorFlowDatasetOptions
-from tfx_bsl.tfxio.tensor_adapter import TensorAdapter
-from tfx_bsl.tfxio.tensor_adapter import TensorAdapterConfig
-from tfx_bsl.tfxio.tensor_adapter import TensorRepresentations
-from tfx_bsl.tfxio.tf_example_record import TFExampleBeamRecord
-from tfx_bsl.tfxio.tf_example_record import TFExampleRecord
-from tfx_bsl.tfxio.tf_sequence_example_record import TFSequenceExampleBeamRecord
-from tfx_bsl.tfxio.tf_sequence_example_record import TFSequenceExampleRecord
+from tfx_bsl.tfxio.csv_tfxio import BeamRecordCsvTFXIO, CsvTFXIO
+from tfx_bsl.tfxio.dataset_options import RecordBatchesOptions, TensorFlowDatasetOptions
+from tfx_bsl.tfxio.tensor_adapter import (
+    TensorAdapter,
+    TensorAdapterConfig,
+    TensorRepresentations,
+)
+from tfx_bsl.tfxio.tf_example_record import TFExampleBeamRecord, TFExampleRecord
+from tfx_bsl.tfxio.tf_sequence_example_record import (
+    TFSequenceExampleBeamRecord,
+    TFSequenceExampleRecord,
+)
 from tfx_bsl.tfxio.tfxio import TFXIO
diff --git a/tfx_bsl/public/tfxio/tfxio_import_test.py b/tfx_bsl/public/tfxio/tfxio_import_test.py
index 7528cd12..37b333e6 100644
--- a/tfx_bsl/public/tfxio/tfxio_import_test.py
+++ b/tfx_bsl/public/tfxio/tfxio_import_test.py
@@ -17,23 +17,24 @@
 
 
 class TfxioImportTest(absltest.TestCase):
-
-  def test_import(self):
-    # pylint: disable=g-import-not-at-top,unused-import
-    from tfx_bsl.public.tfxio import RecordBatchToExamplesEncoder
-    from tfx_bsl.public.tfxio import BeamRecordCsvTFXIO
-    from tfx_bsl.public.tfxio import CsvTFXIO
-    from tfx_bsl.public.tfxio import TensorFlowDatasetOptions
-    from tfx_bsl.public.tfxio import TensorAdapter
-    from tfx_bsl.public.tfxio import TensorAdapterConfig
-    from tfx_bsl.public.tfxio import TensorRepresentations
-    from tfx_bsl.public.tfxio import TFExampleBeamRecord
-    from tfx_bsl.public.tfxio import TFExampleRecord
-    from tfx_bsl.public.tfxio import TFGraphRecordDecoder
-    from tfx_bsl.public.tfxio import TFSequenceExampleBeamRecord
-    from tfx_bsl.public.tfxio import TFSequenceExampleRecord
-    from tfx_bsl.public.tfxio import TFXIO
+    def test_import(self):
+        # pylint: disable=g-import-not-at-top,unused-import
+        from tfx_bsl.public.tfxio import (
+            TFXIO,
+            BeamRecordCsvTFXIO,
+            CsvTFXIO,
+            RecordBatchToExamplesEncoder,
+            TensorAdapter,
+            TensorAdapterConfig,
+            TensorFlowDatasetOptions,
+            TensorRepresentations,
+            TFExampleBeamRecord,
+            TFExampleRecord,
+            TFGraphRecordDecoder,
+            TFSequenceExampleBeamRecord,
+            TFSequenceExampleRecord,
+        )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/sketches/__init__.py b/tfx_bsl/sketches/__init__.py
index f2a095e6..cdf3efc9 100644
--- a/tfx_bsl/sketches/__init__.py
+++ b/tfx_bsl/sketches/__init__.py
@@ -13,19 +13,24 @@
 # limitations under the License.
 
 """Module level imports for tfx_bsl.sketches."""
+
 # pylint: disable=unused-import
 # pytype: disable=import-error
 # pylint: disable=g-import-not-at-top
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.sketches import KmvSketch
-  from tfx_bsl.cc.tfx_bsl_extension.sketches import MisraGriesSketch
-  from tfx_bsl.cc.tfx_bsl_extension.sketches import QuantilesSketch
+    from tfx_bsl.cc.tfx_bsl_extension.sketches import (
+        KmvSketch,
+        MisraGriesSketch,
+        QuantilesSketch,
+    )
 except ImportError as err:
-  import sys
-  sys.stderr.write("Error importing tfx_bsl_extension.sketches. "
-                   "Some tfx_bsl functionalities are not available: {}"
-                   .format(err))
+    import sys
+
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.sketches. "
+        f"Some tfx_bsl functionalities are not available: {err}"
+    )
 
 # pylint: enable=g-import-not-at-top
 # pytype: enable=import-error
diff --git a/tfx_bsl/sketches/kmv_sketch_test.py b/tfx_bsl/sketches/kmv_sketch_test.py
index 98f600a6..f11b42ec 100644
--- a/tfx_bsl/sketches/kmv_sketch_test.py
+++ b/tfx_bsl/sketches/kmv_sketch_test.py
@@ -16,90 +16,89 @@
 import pickle
 
 import pyarrow as pa
-from tfx_bsl import sketches
+from absl.testing import absltest, parameterized
 
-from absl.testing import absltest
-from absl.testing import parameterized
+from tfx_bsl import sketches
 
 _NUM_BUCKETS = 128
 
 
 def _create_basic_sketch(values, num_buckets=_NUM_BUCKETS):
-  sketch = sketches.KmvSketch(num_buckets)
-  sketch.AddValues(values)
-  return sketch
+    sketch = sketches.KmvSketch(num_buckets)
+    sketch.AddValues(values)
+    return sketch
 
 
 class KmvSketchTest(parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ("binary", [b"a", b"a", b"b", b"c", None], pa.binary()),
-      ("large_binary", [b"a", b"a", b"b", b"c"], pa.large_binary()),
-      ("string", ["a", "a", "b", "c", None], pa.string()),
-      ("large_string", ["a", "a", "b", "c"], pa.large_string()),
-      ("int8", [1, 1, 2, 3, None], pa.int8()),
-      ("int16", [1, 1, 2, 3], pa.int16()),
-      ("int32", [1, 1, 2, 3, None], pa.int32()),
-      ("int64", [1, 1, 2, 3], pa.int64()),
-      ("uint8", [1, 1, 2, 3], pa.uint8()),
-      ("uint16", [1, None, 1, 2, 3], pa.uint16()),
-      ("uint32", [1, 1, 2, 3], pa.uint32()),
-      ("uint64", [1, 1, 2, 3, None], pa.uint64()),
-  )
-  def test_add(self, values, type_):
-    sketch = _create_basic_sketch(pa.array(values, type=type_))
-    num_unique = sketch.Estimate()
-
-    self.assertEqual(3, num_unique)
-
-  def test_add_unsupported_type(self):
-    values = pa.array([True, False], pa.bool_())
-    sketch = sketches.KmvSketch(_NUM_BUCKETS)
-    with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED: bool"):
-      sketch.AddValues(values)
-
-  def test_merge(self):
-    sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-    sketch2 = _create_basic_sketch(pa.array(["d", "a"]))
-
-    sketch1.Merge(sketch2)
-    num_unique = sketch1.Estimate()
-
-    self.assertEqual(4, num_unique)
-
-  def test_merge_error(self):
-    sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-    sketch2 = _create_basic_sketch(pa.array(["d", "a"]), num_buckets=64)
-    with self.assertRaisesRegex(
-        Exception, "Both sketches must have the same number of buckets"):
-      sketch1.Merge(sketch2)
-
-  def test_picklable(self):
-    sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-    pickled = pickle.dumps(sketch, 2)
-    self.assertIsInstance(pickled, bytes)
-    unpickled = pickle.loads(pickled)
-    self.assertIsInstance(unpickled, sketches.KmvSketch)
-
-    num_unique = unpickled.Estimate()
-    self.assertEqual(3, num_unique)
-
-  def test_serialization(self):
-    sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-
-    serialized = sketch.Serialize()
-    self.assertIsInstance(serialized, bytes)
-
-    deserialized = sketches.KmvSketch.Deserialize(serialized)
-    self.assertIsInstance(deserialized, sketches.KmvSketch)
-
-    num_unique = deserialized.Estimate()
-    self.assertEqual(3, num_unique)
-
-  def test_deserialize_fails_with_exception(self):
-    with self.assertRaisesRegex(RuntimeError, "Failed to parse Kmv sketch"):
-      sketches.KmvSketch.Deserialize("I am no proto")
+    @parameterized.named_parameters(
+        ("binary", [b"a", b"a", b"b", b"c", None], pa.binary()),
+        ("large_binary", [b"a", b"a", b"b", b"c"], pa.large_binary()),
+        ("string", ["a", "a", "b", "c", None], pa.string()),
+        ("large_string", ["a", "a", "b", "c"], pa.large_string()),
+        ("int8", [1, 1, 2, 3, None], pa.int8()),
+        ("int16", [1, 1, 2, 3], pa.int16()),
+        ("int32", [1, 1, 2, 3, None], pa.int32()),
+        ("int64", [1, 1, 2, 3], pa.int64()),
+        ("uint8", [1, 1, 2, 3], pa.uint8()),
+        ("uint16", [1, None, 1, 2, 3], pa.uint16()),
+        ("uint32", [1, 1, 2, 3], pa.uint32()),
+        ("uint64", [1, 1, 2, 3, None], pa.uint64()),
+    )
+    def test_add(self, values, type_):
+        sketch = _create_basic_sketch(pa.array(values, type=type_))
+        num_unique = sketch.Estimate()
+
+        self.assertEqual(3, num_unique)
+
+    def test_add_unsupported_type(self):
+        values = pa.array([True, False], pa.bool_())
+        sketch = sketches.KmvSketch(_NUM_BUCKETS)
+        with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED: bool"):
+            sketch.AddValues(values)
+
+    def test_merge(self):
+        sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+        sketch2 = _create_basic_sketch(pa.array(["d", "a"]))
+
+        sketch1.Merge(sketch2)
+        num_unique = sketch1.Estimate()
+
+        self.assertEqual(4, num_unique)
+
+    def test_merge_error(self):
+        sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+        sketch2 = _create_basic_sketch(pa.array(["d", "a"]), num_buckets=64)
+        with self.assertRaisesRegex(
+            Exception, "Both sketches must have the same number of buckets"
+        ):
+            sketch1.Merge(sketch2)
+
+    def test_picklable(self):
+        sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+        pickled = pickle.dumps(sketch, 2)
+        self.assertIsInstance(pickled, bytes)
+        unpickled = pickle.loads(pickled)
+        self.assertIsInstance(unpickled, sketches.KmvSketch)
+
+        num_unique = unpickled.Estimate()
+        self.assertEqual(3, num_unique)
+
+    def test_serialization(self):
+        sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+
+        serialized = sketch.Serialize()
+        self.assertIsInstance(serialized, bytes)
+
+        deserialized = sketches.KmvSketch.Deserialize(serialized)
+        self.assertIsInstance(deserialized, sketches.KmvSketch)
+
+        num_unique = deserialized.Estimate()
+        self.assertEqual(3, num_unique)
+
+    def test_deserialize_fails_with_exception(self):
+        with self.assertRaisesRegex(RuntimeError, "Failed to parse Kmv sketch"):
+            sketches.KmvSketch.Deserialize("I am no proto")
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/sketches/misragries_sketch_test.py b/tfx_bsl/sketches/misragries_sketch_test.py
index be161890..bdb981ee 100644
--- a/tfx_bsl/sketches/misragries_sketch_test.py
+++ b/tfx_bsl/sketches/misragries_sketch_test.py
@@ -17,370 +17,335 @@
 import pickle
 
 import pyarrow as pa
-from tfx_bsl import sketches
+from absl.testing import absltest, parameterized
 
-from absl.testing import absltest
-from absl.testing import parameterized
+from tfx_bsl import sketches
 
 _NUM_BUCKETS = 128
 
 
-def _create_basic_sketch(
-    items, weights=None, num_buckets=_NUM_BUCKETS, reverse=False
-):
-  order = (
-      sketches.MisraGriesSketch.OrderOnTie.ReverseLexicographical
-      if reverse
-      else sketches.MisraGriesSketch.OrderOnTie.Lexicographical
-  )
-  sketch = sketches.MisraGriesSketch(num_buckets, order_on_tie=order)
-  if weights:
-    sketch.AddValues(items, weights)
-  else:
-    sketch.AddValues(items)
-  return sketch
+def _create_basic_sketch(items, weights=None, num_buckets=_NUM_BUCKETS, reverse=False):
+    order = (
+        sketches.MisraGriesSketch.OrderOnTie.ReverseLexicographical
+        if reverse
+        else sketches.MisraGriesSketch.OrderOnTie.Lexicographical
+    )
+    sketch = sketches.MisraGriesSketch(num_buckets, order_on_tie=order)
+    if weights:
+        sketch.AddValues(items, weights)
+    else:
+        sketch.AddValues(items)
+    return sketch
 
 
 class MisraGriesSketchTest(parameterized.TestCase):
+    @parameterized.named_parameters(
+        ("binary", [b"a", b"a", b"b", b"c", None], pa.binary()),
+        ("large_binary", [b"a", b"a", b"b", b"c"], pa.large_binary()),
+        ("string", ["a", "a", "b", "c", None], pa.string()),
+        ("large_string", ["a", "a", "b", "c"], pa.large_string()),
+    )
+    def test_add_binary_like(self, values, binary_like_type):
+        expected_counts = [
+            {"values": b"a", "counts": 2.0},
+            {"values": b"b", "counts": 1.0},
+            {"values": b"c", "counts": 1.0},
+        ]
+        sketch = _create_basic_sketch(pa.array(values, type=binary_like_type))
+        estimate = sketch.Estimate()
+        estimate.validate(full=True)
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    @parameterized.named_parameters(
+        ("int8", [1, 1, 2, 3, None], pa.int8()),
+        ("int16", [1, 1, 2, 3], pa.int16()),
+        ("int32", [1, 1, 2, 3, None], pa.int32()),
+        ("int64", [1, 1, 2, 3], pa.int64()),
+        ("uint8", [1, 1, 2, 3], pa.uint8()),
+        ("uint16", [1, None, 1, 2, 3], pa.uint16()),
+        ("uint32", [1, 1, 2, 3], pa.uint32()),
+        ("uint64", [1, 1, 2, 3, None], pa.uint64()),
+    )
+    def test_add_integer(self, values, integer_type):
+        expected_counts = [
+            {"values": b"1", "counts": 2.0},
+            {"values": b"2", "counts": 1.0},
+            {"values": b"3", "counts": 1.0},
+        ]
+        sketch = _create_basic_sketch(pa.array(values, type=integer_type))
+        estimate = sketch.Estimate()
+        estimate.validate(full=True)
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_add_weighted_values(self):
+        items = pa.array(["a", "a", "b", "c"], type=pa.string())
+        weights = pa.array([4, 3, 2, 1], type=pa.float32())
+        sketch = _create_basic_sketch(items, weights=weights)
+
+        expected_counts = [
+            {"values": b"a", "counts": 7.0},
+            {"values": b"b", "counts": 2.0},
+            {"values": b"c", "counts": 1.0},
+        ]
+        estimate = sketch.Estimate()
+        estimate.validate(full=True)
+
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_add_invalid_weights(self):
+        items = pa.array(["a", "a", "b", "c"], type=pa.string())
+        weights = pa.array([4, 3, 2, 1], type=pa.int64())
+        with self.assertRaisesRegex(
+            RuntimeError, "INVALID_ARGUMENT: Weight array must be float type."
+        ):
+            _create_basic_sketch(items, weights=weights)
+
+    def test_add_unsupported_type(self):
+        values = pa.array([True, False], pa.bool_())
+        sketch = sketches.MisraGriesSketch(_NUM_BUCKETS)
+        with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED: bool"):
+            sketch.AddValues(values)
+
+    def test_reverse_order(self):
+        items = pa.array(["a", "a", "b", "c"], type=pa.string())
+        sketch = _create_basic_sketch(items, reverse=True)
+        expected_counts = [
+            {"values": b"a", "counts": 2.0},
+            {"values": b"c", "counts": 1.0},
+            {"values": b"b", "counts": 1.0},
+        ]
+        estimate = sketch.Estimate()
+        estimate.validate(full=True)
+
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_replace_invalid_utf8(self):
+        values1 = pa.array(
+            [
+                b"a",
+                b"\x80",  # invalid
+                b"\xc1",  # invalid
+            ]
+        )
+        values2 = pa.array(
+            [
+                b"\xc0\x80",  # invalid
+                b"a",
+            ]
+        )
+        sketch1 = sketches.MisraGriesSketch(
+            _NUM_BUCKETS, invalid_utf8_placeholder=b"<BYTES>"
+        )
+        sketch1.AddValues(values1)
+
+        sketch2 = sketches.MisraGriesSketch(
+            _NUM_BUCKETS, invalid_utf8_placeholder=b"<BYTES>"
+        )
+        sketch2.AddValues(values2)
+
+        serialized1 = sketch1.Serialize()
+        serialized2 = sketch2.Serialize()
+
+        sketch1 = sketches.MisraGriesSketch.Deserialize(serialized1)
+        sketch2 = sketches.MisraGriesSketch.Deserialize(serialized2)
+        sketch1.AddValues(values2)
+        sketch1.Merge(sketch2)
+
+        actual = sketch1.Estimate()
+        actual.validate(full=True)
+        self.assertEqual(
+            actual.to_pylist(),
+            [
+                {"values": b"<BYTES>", "counts": 4.0},
+                {"values": b"a", "counts": 3.0},
+            ],
+        )
+
+    def test_no_replace_invalid_utf8(self):
+        sketch = sketches.MisraGriesSketch(_NUM_BUCKETS)
+        sketch.AddValues(pa.array([b"\x80"]))
+        actual = sketch.Estimate()
+        self.assertEqual(
+            actual.to_pylist(),
+            [
+                {"values": b"\x80", "counts": 1.0},
+            ],
+        )
+
+    def test_large_string_threshold(self):
+        values1 = pa.array(["a", "bbb", "c", "d", "eeff"])
+        values2 = pa.array(["a", "gghh"])
+        sketch1 = sketches.MisraGriesSketch(
+            _NUM_BUCKETS, large_string_threshold=2, large_string_placeholder=b"<LARGE>"
+        )
+        sketch1.AddValues(values1)
+
+        sketch2 = sketches.MisraGriesSketch(
+            _NUM_BUCKETS, large_string_threshold=2, large_string_placeholder=b"<LARGE>"
+        )
+        sketch2.AddValues(values2)
+
+        serialized1 = sketch1.Serialize()
+        serialized2 = sketch2.Serialize()
+
+        sketch1 = sketches.MisraGriesSketch.Deserialize(serialized1)
+        sketch2 = sketches.MisraGriesSketch.Deserialize(serialized2)
+        sketch1.AddValues(values2)
+        sketch1.Merge(sketch2)
+
+        actual = sketch1.Estimate()
+        actual.validate(full=True)
+        self.assertEqual(
+            actual.to_pylist(),
+            [
+                {"values": b"<LARGE>", "counts": 4.0},
+                {"values": b"a", "counts": 3.0},
+                {"values": b"c", "counts": 1.0},
+                {"values": b"d", "counts": 1.0},
+            ],
+        )
+
+    def test_invalid_large_string_replacing_config(self):
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "Must provide both or neither large_string_threshold and "
+            "large_string_placeholder",
+        ):
+            _ = sketches.MisraGriesSketch(_NUM_BUCKETS, large_string_threshold=1024)
+
+        with self.assertRaisesRegex(
+            RuntimeError,
+            "Must provide both or neither large_string_threshold and "
+            "large_string_placeholder",
+        ):
+            _ = sketches.MisraGriesSketch(_NUM_BUCKETS, large_string_placeholder=b"<L>")
+
+    def test_many_uniques(self):
+        # Test that the tail elements with equal counts are not discarded after
+        # `AddValues` call.
+        sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]), num_buckets=2)
+        estimate = sketch.Estimate()
+        estimate.validate(full=True)
+        # Since "b" and "c" have equal counts and neither token has count > 4/2, any
+        # combination is possible.
+        all_counts = [
+            {"values": b"a", "counts": 2.0},
+            {"values": b"b", "counts": 1.0},
+            {"values": b"c", "counts": 1.0},
+        ]
+        self.assertIn(
+            tuple(estimate.to_pylist()), list(itertools.combinations(all_counts, 2))
+        )
+
+    def test_merge(self):
+        sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+        sketch2 = _create_basic_sketch(pa.array(["d", "a"]))
+
+        sketch1.Merge(sketch2)
+        estimate = sketch1.Estimate()
+        estimate.validate(full=True)
+        expected_counts = [
+            {"values": b"a", "counts": 3.0},
+            {"values": b"b", "counts": 1.0},
+            {"values": b"c", "counts": 1.0},
+            {"values": b"d", "counts": 1.0},
+        ]
+
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_merge_equal_to_kth_weights(self):
+        # Test that tail elements with equal counts are not discarded after
+        # `Compress` call.
+        sketch1 = _create_basic_sketch(
+            pa.array(["a"] * 5 + ["b"] * 5 + ["c"] * 4 + ["a"] * 4), num_buckets=3
+        )
+        sketch2 = _create_basic_sketch(pa.array(["d"] * 4 + ["a"] * 2), num_buckets=3)
+        sketch1.Merge(sketch2)
+        estimate = sketch1.Estimate()
+        estimate.validate(full=True)
+        # Since "c" and "d" have equal counts, the last entry may be either.
+        expected_counts1 = [
+            {"values": b"a", "counts": 11.0},
+            {"values": b"b", "counts": 5.0},
+            {"values": b"c", "counts": 4.0},
+        ]
+        expected_counts2 = expected_counts1.copy()
+        expected_counts2[2] = {"values": b"d", "counts": 4.0}
+        self.assertIn(estimate.to_pylist(), [expected_counts1, expected_counts2])
+
+    def test_merge_with_extra_items(self):
+        # Each of these sketches get more values than `num_buckets`. This will
+        # result into removal of less frequent elements from the main buffer and
+        # adding them to a buffer of extra elements.
+        # Here we're testing that merging of sketches having extra elements is
+        # correct and results in a sketch that produces the requested number of
+        # elements.
+        sketch1 = _create_basic_sketch(
+            pa.array(["a"] * 3 + ["b"] * 2 + ["c", "d"]), num_buckets=3
+        )
+        sketch2 = _create_basic_sketch(
+            pa.array(["e"] * 3 + ["f"] * 2 + ["g", "h"]), num_buckets=3
+        )
+        sketch3 = _create_basic_sketch(
+            pa.array(["i"] * 2 + ["j", "k", "l"]), num_buckets=3
+        )
+        sketch1.Merge(sketch2)
+        sketch1.Merge(sketch3)
+        estimate = sketch1.Estimate()
+        estimate.validate(full=True)
+
+        # Due to large number of unique elements (relative to `num_buckets`), the
+        # total estimated count error is 5.
+        def get_expected_counts():
+            for least_frequent_item in [b"b", b"f", b"i"]:
+                yield [
+                    {"values": b"a", "counts": 5.0},
+                    {"values": b"e", "counts": 5.0},
+                    {"values": least_frequent_item, "counts": 5.0},
+                ]
+
+        self.assertIn(estimate.to_pylist(), list(get_expected_counts()))
+
+    def test_picklable(self):
+        sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+        pickled = pickle.dumps(sketch, 2)
+        self.assertIsInstance(pickled, bytes)
+        unpickled = pickle.loads(pickled)
+        self.assertIsInstance(unpickled, sketches.MisraGriesSketch)
+
+        estimate = unpickled.Estimate()
+        estimate.validate(full=True)
+        expected_counts = [
+            {"values": b"a", "counts": 2.0},
+            {"values": b"b", "counts": 1.0},
+            {"values": b"c", "counts": 1.0},
+        ]
+
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_serialization(self):
+        sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
+
+        serialized = sketch.Serialize()
+        self.assertIsInstance(serialized, bytes)
+
+        deserialized = sketches.MisraGriesSketch.Deserialize(serialized)
+        self.assertIsInstance(deserialized, sketches.MisraGriesSketch)
+
+        estimate = deserialized.Estimate()
+        estimate.validate(full=True)
+        expected_counts = [
+            {"values": b"a", "counts": 2.0},
+            {"values": b"b", "counts": 1.0},
+            {"values": b"c", "counts": 1.0},
+        ]
+
+        self.assertEqual(estimate.to_pylist(), expected_counts)
+
+    def test_deserialize_fails_with_exception(self):
+        with self.assertRaisesRegex(RuntimeError, "Failed to parse MisraGries sketch"):
+            sketches.MisraGriesSketch.Deserialize("I am no proto")
 
-  @parameterized.named_parameters(
-      ("binary", [b"a", b"a", b"b", b"c", None], pa.binary()),
-      ("large_binary", [b"a", b"a", b"b", b"c"], pa.large_binary()),
-      ("string", ["a", "a", "b", "c", None], pa.string()),
-      ("large_string", ["a", "a", "b", "c"], pa.large_string()),
-  )
-  def test_add_binary_like(self, values, binary_like_type):
-    expected_counts = [{
-        "values": b"a",
-        "counts": 2.0
-    }, {
-        "values": b"b",
-        "counts": 1.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }]
-    sketch = _create_basic_sketch(pa.array(values, type=binary_like_type))
-    estimate = sketch.Estimate()
-    estimate.validate(full=True)
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  @parameterized.named_parameters(
-      ("int8", [1, 1, 2, 3, None], pa.int8()),
-      ("int16", [1, 1, 2, 3], pa.int16()),
-      ("int32", [1, 1, 2, 3, None], pa.int32()),
-      ("int64", [1, 1, 2, 3], pa.int64()),
-      ("uint8", [1, 1, 2, 3], pa.uint8()),
-      ("uint16", [1, None, 1, 2, 3], pa.uint16()),
-      ("uint32", [1, 1, 2, 3], pa.uint32()),
-      ("uint64", [1, 1, 2, 3, None], pa.uint64()),
-  )
-  def test_add_integer(self, values, integer_type):
-    expected_counts = [{
-        "values": b"1",
-        "counts": 2.0
-    }, {
-        "values": b"2",
-        "counts": 1.0
-    }, {
-        "values": b"3",
-        "counts": 1.0
-    }]
-    sketch = _create_basic_sketch(pa.array(values, type=integer_type))
-    estimate = sketch.Estimate()
-    estimate.validate(full=True)
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_add_weighted_values(self):
-    items = pa.array(["a", "a", "b", "c"], type=pa.string())
-    weights = pa.array([4, 3, 2, 1], type=pa.float32())
-    sketch = _create_basic_sketch(items, weights=weights)
-
-    expected_counts = [{
-        "values": b"a",
-        "counts": 7.0
-    }, {
-        "values": b"b",
-        "counts": 2.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }]
-    estimate = sketch.Estimate()
-    estimate.validate(full=True)
-
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_add_invalid_weights(self):
-    items = pa.array(["a", "a", "b", "c"], type=pa.string())
-    weights = pa.array([4, 3, 2, 1], type=pa.int64())
-    with self.assertRaisesRegex(
-        RuntimeError, "INVALID_ARGUMENT: Weight array must be float type."):
-      _create_basic_sketch(items, weights=weights)
-
-  def test_add_unsupported_type(self):
-    values = pa.array([True, False], pa.bool_())
-    sketch = sketches.MisraGriesSketch(_NUM_BUCKETS)
-    with self.assertRaisesRegex(RuntimeError, "UNIMPLEMENTED: bool"):
-      sketch.AddValues(values)
-
-  def test_reverse_order(self):
-    items = pa.array(["a", "a", "b", "c"], type=pa.string())
-    sketch = _create_basic_sketch(items, reverse=True)
-    expected_counts = [
-        {"values": b"a", "counts": 2.0},
-        {"values": b"c", "counts": 1.0},
-        {"values": b"b", "counts": 1.0},
-    ]
-    estimate = sketch.Estimate()
-    estimate.validate(full=True)
-
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_replace_invalid_utf8(self):
-    values1 = pa.array([
-        b"a",
-        b"\x80",  # invalid
-        b"\xC1",  # invalid
-    ])
-    values2 = pa.array([
-        b"\xc0\x80",  # invalid
-        b"a"])
-    sketch1 = sketches.MisraGriesSketch(
-        _NUM_BUCKETS,
-        invalid_utf8_placeholder=b"<BYTES>")
-    sketch1.AddValues(values1)
-
-    sketch2 = sketches.MisraGriesSketch(
-        _NUM_BUCKETS,
-        invalid_utf8_placeholder=b"<BYTES>")
-    sketch2.AddValues(values2)
-
-    serialized1 = sketch1.Serialize()
-    serialized2 = sketch2.Serialize()
-
-    sketch1 = sketches.MisraGriesSketch.Deserialize(serialized1)
-    sketch2 = sketches.MisraGriesSketch.Deserialize(serialized2)
-    sketch1.AddValues(values2)
-    sketch1.Merge(sketch2)
-
-    actual = sketch1.Estimate()
-    actual.validate(full=True)
-    self.assertEqual(actual.to_pylist(), [
-        {"values": b"<BYTES>", "counts": 4.0},
-        {"values": b"a", "counts": 3.0},
-    ])
-
-  def test_no_replace_invalid_utf8(self):
-    sketch = sketches.MisraGriesSketch(
-        _NUM_BUCKETS)
-    sketch.AddValues(pa.array([b"\x80"]))
-    actual = sketch.Estimate()
-    self.assertEqual(actual.to_pylist(), [
-        {"values": b"\x80", "counts": 1.0},
-    ])
-
-  def test_large_string_threshold(self):
-    values1 = pa.array(["a", "bbb", "c", "d", "eeff"])
-    values2 = pa.array(["a", "gghh"])
-    sketch1 = sketches.MisraGriesSketch(
-        _NUM_BUCKETS,
-        large_string_threshold=2,
-        large_string_placeholder=b"<LARGE>")
-    sketch1.AddValues(values1)
-
-    sketch2 = sketches.MisraGriesSketch(
-        _NUM_BUCKETS,
-        large_string_threshold=2,
-        large_string_placeholder=b"<LARGE>")
-    sketch2.AddValues(values2)
-
-    serialized1 = sketch1.Serialize()
-    serialized2 = sketch2.Serialize()
-
-    sketch1 = sketches.MisraGriesSketch.Deserialize(serialized1)
-    sketch2 = sketches.MisraGriesSketch.Deserialize(serialized2)
-    sketch1.AddValues(values2)
-    sketch1.Merge(sketch2)
-
-    actual = sketch1.Estimate()
-    actual.validate(full=True)
-    self.assertEqual(actual.to_pylist(), [
-        {"values": b"<LARGE>", "counts": 4.0},
-        {"values": b"a", "counts": 3.0},
-        {"values": b"c", "counts": 1.0},
-        {"values": b"d", "counts": 1.0},
-    ])
-
-  def test_invalid_large_string_replacing_config(self):
-    with self.assertRaisesRegex(
-        RuntimeError,
-        "Must provide both or neither large_string_threshold and "
-        "large_string_placeholder"):
-      _ = sketches.MisraGriesSketch(_NUM_BUCKETS, large_string_threshold=1024)
-
-    with self.assertRaisesRegex(
-        RuntimeError,
-        "Must provide both or neither large_string_threshold and "
-        "large_string_placeholder"):
-      _ = sketches.MisraGriesSketch(
-          _NUM_BUCKETS, large_string_placeholder=b"<L>")
-
-  def test_many_uniques(self):
-    # Test that the tail elements with equal counts are not discarded after
-    # `AddValues` call.
-    sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]), num_buckets=2)
-    estimate = sketch.Estimate()
-    estimate.validate(full=True)
-    # Since "b" and "c" have equal counts and neither token has count > 4/2, any
-    # combination is possible.
-    all_counts = [{
-        "values": b"a",
-        "counts": 2.0
-    }, {
-        "values": b"b",
-        "counts": 1.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }]
-    self.assertIn(
-        tuple(estimate.to_pylist()),
-        list(itertools.combinations(all_counts, 2)))
-
-  def test_merge(self):
-    sketch1 = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-    sketch2 = _create_basic_sketch(pa.array(["d", "a"]))
-
-    sketch1.Merge(sketch2)
-    estimate = sketch1.Estimate()
-    estimate.validate(full=True)
-    expected_counts = [{
-        "values": b"a",
-        "counts": 3.0
-    }, {
-        "values": b"b",
-        "counts": 1.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }, {
-        "values": b"d",
-        "counts": 1.0
-    }]
-
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_merge_equal_to_kth_weights(self):
-    # Test that tail elements with equal counts are not discarded after
-    # `Compress` call.
-    sketch1 = _create_basic_sketch(
-        pa.array(["a"] * 5 + ["b"] * 5 + ["c"] * 4 + ["a"] * 4), num_buckets=3)
-    sketch2 = _create_basic_sketch(
-        pa.array(["d"] * 4 + ["a"] * 2), num_buckets=3)
-    sketch1.Merge(sketch2)
-    estimate = sketch1.Estimate()
-    estimate.validate(full=True)
-    # Since "c" and "d" have equal counts, the last entry may be either.
-    expected_counts1 = [{
-        "values": b"a",
-        "counts": 11.0
-    }, {
-        "values": b"b",
-        "counts": 5.0
-    }, {
-        "values": b"c",
-        "counts": 4.0
-    }]
-    expected_counts2 = expected_counts1.copy()
-    expected_counts2[2] = {"values": b"d", "counts": 4.0}
-    self.assertIn(estimate.to_pylist(), [expected_counts1, expected_counts2])
-
-  def test_merge_with_extra_items(self):
-    # Each of these sketches get more values than `num_buckets`. This will
-    # result into removal of less frequent elements from the main buffer and
-    # adding them to a buffer of extra elements.
-    # Here we're testing that merging of sketches having extra elements is
-    # correct and results in a sketch that produces the requested number of
-    # elements.
-    sketch1 = _create_basic_sketch(
-        pa.array(["a"] * 3 + ["b"] * 2 + ["c", "d"]), num_buckets=3)
-    sketch2 = _create_basic_sketch(
-        pa.array(["e"] * 3 + ["f"] * 2 + ["g", "h"]), num_buckets=3)
-    sketch3 = _create_basic_sketch(
-        pa.array(["i"] * 2 + ["j", "k", "l"]), num_buckets=3)
-    sketch1.Merge(sketch2)
-    sketch1.Merge(sketch3)
-    estimate = sketch1.Estimate()
-    estimate.validate(full=True)
-
-    # Due to large number of unique elements (relative to `num_buckets`), the
-    # total estimated count error is 5.
-    def get_expected_counts():
-      for least_frequent_item in [b"b", b"f", b"i"]:
-        yield [{
-            "values": b"a",
-            "counts": 5.0
-        }, {
-            "values": b"e",
-            "counts": 5.0
-        }, {
-            "values": least_frequent_item,
-            "counts": 5.0
-        }]
-
-    self.assertIn(estimate.to_pylist(), list(get_expected_counts()))
-
-  def test_picklable(self):
-    sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-    pickled = pickle.dumps(sketch, 2)
-    self.assertIsInstance(pickled, bytes)
-    unpickled = pickle.loads(pickled)
-    self.assertIsInstance(unpickled, sketches.MisraGriesSketch)
-
-    estimate = unpickled.Estimate()
-    estimate.validate(full=True)
-    expected_counts = [{
-        "values": b"a",
-        "counts": 2.0
-    }, {
-        "values": b"b",
-        "counts": 1.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }]
-
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_serialization(self):
-    sketch = _create_basic_sketch(pa.array(["a", "b", "c", "a"]))
-
-    serialized = sketch.Serialize()
-    self.assertIsInstance(serialized, bytes)
-
-    deserialized = sketches.MisraGriesSketch.Deserialize(serialized)
-    self.assertIsInstance(deserialized, sketches.MisraGriesSketch)
-
-    estimate = deserialized.Estimate()
-    estimate.validate(full=True)
-    expected_counts = [{
-        "values": b"a",
-        "counts": 2.0
-    }, {
-        "values": b"b",
-        "counts": 1.0
-    }, {
-        "values": b"c",
-        "counts": 1.0
-    }]
-
-    self.assertEqual(estimate.to_pylist(), expected_counts)
-
-  def test_deserialize_fails_with_exception(self):
-    with self.assertRaisesRegex(RuntimeError,
-                                "Failed to parse MisraGries sketch"):
-      sketches.MisraGriesSketch.Deserialize("I am no proto")
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/sketches/quantiles_sketch_test.py b/tfx_bsl/sketches/quantiles_sketch_test.py
index 4143b362..07978d28 100644
--- a/tfx_bsl/sketches/quantiles_sketch_test.py
+++ b/tfx_bsl/sketches/quantiles_sketch_test.py
@@ -19,10 +19,9 @@
 
 import numpy as np
 import pyarrow as pa
-from tfx_bsl import sketches
+from absl.testing import absltest, parameterized
 
-from absl.testing import absltest
-from absl.testing import parameterized
+from tfx_bsl import sketches
 
 _QUANTILES_CUMULATIVE_WEIGHTS_TEST_CASES = [
     dict(
@@ -34,7 +33,8 @@
         ],
         expected=[[1, 61, 121, 181, 241, 300]],
         expected_weights=[[1, 61, 121, 181, 241, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="unweighted_lumpy",
         values=[
@@ -44,7 +44,8 @@
         ],
         expected=[[1, 2, 3, 4]],
         expected_weights=[[2, 8, 10, 12]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="more_quantiles_than_values",
         values=[
@@ -52,7 +53,8 @@
         ],
         expected=[[0, 0, 0, 0]],
         expected_weights=[[100, 100, 100, 100]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="unweighted_elementwise",
         values=[
@@ -60,15 +62,22 @@
             pa.array(np.linspace(101, 600, 500, dtype=np.float64)),
             pa.array(np.linspace(201, 700, 500, dtype=np.float64)),
         ],
-        expected=[[1, 201, 301, 401, 501, 696], [2, 202, 302, 402, 502, 697],
-                  [3, 203, 303, 403, 503, 698], [4, 204, 304, 404, 504, 699],
-                  [5, 205, 305, 405, 505, 700]],
-        expected_weights=[[1, 63, 123, 183, 242, 300],
-                          [1, 63, 123, 183, 242, 300],
-                          [1, 63, 123, 183, 242, 300],
-                          [1, 63, 123, 183, 242, 300],
-                          [1, 63, 123, 183, 242, 300]],
-        num_streams=5),
+        expected=[
+            [1, 201, 301, 401, 501, 696],
+            [2, 202, 302, 402, 502, 697],
+            [3, 203, 303, 403, 503, 698],
+            [4, 204, 304, 404, 504, 699],
+            [5, 205, 305, 405, 505, 700],
+        ],
+        expected_weights=[
+            [1, 63, 123, 183, 242, 300],
+            [1, 63, 123, 183, 242, 300],
+            [1, 63, 123, 183, 242, 300],
+            [1, 63, 123, 183, 242, 300],
+            [1, 63, 123, 183, 242, 300],
+        ],
+        num_streams=5,
+    ),
     dict(
         testcase_name="weighted",
         values=[
@@ -82,7 +91,8 @@
             pa.array([3] * 100, type=pa.float64()),
         ],
         expected=[[1, 111, 171, 221, 261, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="weighted_elementwise",
         values=[
@@ -95,24 +105,31 @@
             pa.array([2] * 100, type=pa.float64()),
             pa.array([3] * 100, type=pa.float64()),
         ],
-        expected=[[1, 231, 331, 431, 541, 696], [2, 232, 332, 432, 542, 697],
-                  [3, 233, 333, 433, 543, 698], [4, 234, 334, 434, 544, 699],
-                  [5, 235, 335, 435, 545, 700]],
-        expected_weights=[[1, 122, 242, 362, 485, 600],
-                          [1, 122, 242, 362, 485, 600],
-                          [1, 122, 242, 362, 485, 600],
-                          [1, 122, 242, 362, 485, 600],
-                          [1, 122, 242, 362, 485, 600]],
-        num_streams=5),
+        expected=[
+            [1, 231, 331, 431, 541, 696],
+            [2, 232, 332, 432, 542, 697],
+            [3, 233, 333, 433, 543, 698],
+            [4, 234, 334, 434, 544, 699],
+            [5, 235, 335, 435, 545, 700],
+        ],
+        expected_weights=[
+            [1, 122, 242, 362, 485, 600],
+            [1, 122, 242, 362, 485, 600],
+            [1, 122, 242, 362, 485, 600],
+            [1, 122, 242, 362, 485, 600],
+            [1, 122, 242, 362, 485, 600],
+        ],
+        num_streams=5,
+    ),
     dict(
         testcase_name="infinity",
         values=[
-            pa.array(
-                [1.0, 2.0, np.inf, np.inf, -np.inf, 3.0, 4.0, 5.0, -np.inf]),
+            pa.array([1.0, 2.0, np.inf, np.inf, -np.inf, 3.0, 4.0, 5.0, -np.inf]),
             pa.array([1.0, np.inf, -np.inf]),
         ],
         expected=[[-np.inf, -np.inf, 1, 4, np.inf, np.inf]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="null",
         values=[
@@ -123,7 +140,8 @@
         ],
         expected=[[1, 61, 121, 181, 241, 300]],
         expected_weights=[[1, 61, 121, 181, 241, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="nan",
         values=[
@@ -134,7 +152,8 @@
         ],
         expected=[[1, 61, 121, 181, 241, 300]],
         expected_weights=[[1, 61, 121, 181, 241, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="nan_weighted",
         values=[
@@ -151,7 +170,8 @@
         ],
         expected=[[1, 111, 171, 221, 261, 300]],
         expected_weights=[[1, 122, 242, 363, 483, 600]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="int",
         values=[
@@ -160,7 +180,8 @@
             pa.array(np.linspace(201, 300, 100, dtype=np.int32)),
         ],
         expected=[[1, 61, 121, 181, 241, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
     dict(
         testcase_name="negative_weights",
         values=[
@@ -176,304 +197,323 @@
             pa.array([0, -1]),
         ],
         expected=[[1, 111, 171, 221, 261, 300]],
-        num_streams=1),
+        num_streams=1,
+    ),
 ]
 _QUANTILES_TEST_CASES = copy.deepcopy(_QUANTILES_CUMULATIVE_WEIGHTS_TEST_CASES)
 for tc in _QUANTILES_TEST_CASES:
-  tc.pop("expected_weights", None)
+    tc.pop("expected_weights", None)
 
 _MAX_NUM_ELEMENTS = [2**10, 2**14, 2**18, 2**19]
 _EPS = [0.5, 0.01, 0.001, 0.0001, 0.000001]
 _NUM_QUANTILES = [5**4, 3**6, 1000, 2**10]
 
-_ACCURACY_TEST_CASES = list(
-    itertools.product(_MAX_NUM_ELEMENTS, _EPS, _NUM_QUANTILES))
+_ACCURACY_TEST_CASES = list(itertools.product(_MAX_NUM_ELEMENTS, _EPS, _NUM_QUANTILES))
 
 
 def _add_values(sketch, value, weight):
-  if weight is None:
-    sketch.AddValues(value)
-  else:
-    sketch.AddValues(value, weight)
+    if weight is None:
+        sketch.AddValues(value)
+    else:
+        sketch.AddValues(value, weight)
 
 
 def _pickle_roundtrip(s):
-  return pickle.loads(pickle.dumps(s))
+    return pickle.loads(pickle.dumps(s))
 
 
 def _get_cumulative_weight(values, quantiles, weights):
-  if weights is None:
-    weights = np.ones(values.shape[0])
-  cum_weight = []
-  for q in quantiles:
-    cum_weight.append(weights[values <= q].sum())
-  return np.array(cum_weight)
+    if weights is None:
+        weights = np.ones(values.shape[0])
+    cum_weight = []
+    for q in quantiles:
+        cum_weight.append(weights[values <= q].sum())
+    return np.array(cum_weight)
 
 
 def _partition_streams(values, num_streams):
-  result = []
-  for v in values:
-    result.append(v.to_numpy(zero_copy_only=True))
-  result = np.concatenate(result)
-  stream_results = []
-  for stream in range(num_streams):
-    stream_results.append(result[stream::num_streams])
-  return stream_results
+    result = []
+    for v in values:
+        result.append(v.to_numpy(zero_copy_only=True))
+    result = np.concatenate(result)
+    stream_results = []
+    for stream in range(num_streams):
+        stream_results.append(result[stream::num_streams])
+    return stream_results
 
 
 class QuantilesSketchTest(parameterized.TestCase):
-
-  def assert_quantiles_accuracy(self, quantiles, cdf, eps, cumulative_weights):
-    # Helper function to validate quantiles accuracy given a cdf function.
-    # This function assumes that quantiles input values are of the form
-    # range(N). Note that this function also validates order of quantiles since
-    # their cdf values are compared to ordered expected levels.
-    num_quantiles = len(quantiles)
-    expected_cumulative_weights = [
-        i / (num_quantiles - 1) for i in range(num_quantiles)
-    ]
-    for expected_level, quantile, level in zip(expected_cumulative_weights,
-                                               quantiles, cumulative_weights):
-      level /= sum(cumulative_weights)
-      quantile_cdf = cdf(quantile)
-      left_cdf = cdf(quantile - 1)
-      right_cdf = cdf(quantile + 1)
-      error_msg = (
-          "Accuracy of the given quantile is not sufficient, "
-          "quantile={} of expected level {}, its cdf is {}; cdf of a value to "
-          "the left is {}, to the right is {}. Error bound = {}.").format(
-              quantile, level, quantile_cdf, left_cdf, right_cdf, eps)
-      self.assertTrue(
-          abs(expected_level - cdf(quantile)) < eps or
-          left_cdf < expected_level < right_cdf or
-          (expected_level == 0 and left_cdf == 0), error_msg)
-    # Check that the true CDF and the reported CDF from cumulative weights are
-    # close up to eps.
-    full_true_cdf = np.array([cdf(q) for q in quantiles])
-    full_reported_cdf = np.array(cumulative_weights)
-    full_reported_cdf /= full_reported_cdf.max()
-    np.testing.assert_allclose(full_reported_cdf, full_true_cdf, atol=eps)
-
-  def test_quantiles_sketch_init(self):
-    with self.assertRaisesRegex(RuntimeError, "eps must be positive"):
-      _ = sketches.QuantilesSketch(0, 1 << 32, 1)
-
-    with self.assertRaisesRegex(RuntimeError, "max_num_elements must be >= 1."):
-      _ = sketches.QuantilesSketch(0.0001, 0, 1)
-
-    with self.assertRaisesRegex(RuntimeError, "num_streams must be >= 1."):
-      _ = sketches.QuantilesSketch(0.0001, 1 << 32, 0)
-
-    _ = sketches.QuantilesSketch(0.0001, 1 << 32, 1)
-
-  @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
-  def test_quantiles(
-      self,
-      values,
-      expected,
-      num_streams,
-      weights=None,
-  ):
-    s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    if weights is None:
-      weights = [None] * len(values)
-    for value, weight in zip(values, weights):
-      _add_values(s, value, weight)
-
-    result = s.GetQuantiles(len(expected[0]) - 1).to_pylist()
-    np.testing.assert_almost_equal(expected, result)
-
-  @parameterized.named_parameters(*_QUANTILES_CUMULATIVE_WEIGHTS_TEST_CASES)
-  def test_cumulative_weights(self,
-                              values,
-                              expected,
-                              num_streams,
-                              expected_weights=None,
-                              weights=None):
-    s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    if weights is None:
-      weights = [None] * len(values)
-    for value, weight in zip(values, weights):
-      _add_values(s, value, weight)
-    result_quantiles, result_weights = s.GetQuantilesAndCumulativeWeights(
-        len(expected[0]) - 1)
-    result_quantiles = result_quantiles.to_pylist()
-    result_weights = result_weights.to_pylist()
-
-    if expected_weights is None:
-      # Expected cumulative weights not provided, so compute them.
-      assert num_streams == 1
-      values_streams = _partition_streams(values, num_streams)
-      if weights[0] is None:
-        weights_streams = num_streams * [None]
-      else:
-        weights_streams = _partition_streams(weights, num_streams)
-      for stream_idx in range(num_streams):
-        expected_quantiles = expected[stream_idx]
-        cumul_weights = _get_cumulative_weight(values_streams[stream_idx],
-                                               expected_quantiles,
-                                               weights_streams[stream_idx])
-        np.testing.assert_almost_equal(result_weights[stream_idx],
-                                       cumul_weights)
-    else:
-      np.testing.assert_almost_equal(expected_weights, result_weights)
-
-  @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
-  def test_pickle(self, values, expected, num_streams, weights=None):
-    s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    if weights is None:
-      weights = [None] * len(values)
-    for value, weight in zip(values, weights):
-      _add_values(s, value, weight)
-    pickled = pickle.dumps(s)
-    self.assertIsInstance(pickled, bytes)
-    unpickled = pickle.loads(pickled)
-    self.assertIsInstance(unpickled, sketches.QuantilesSketch)
-    result = unpickled.GetQuantiles(len(expected[0]) - 1).to_pylist()
-    np.testing.assert_almost_equal(expected, result)
-
-  @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
-  def test_merge(self, values, expected, num_streams, weights=None):
-    if weights is None:
-      weights = [None] * len(values)
-    s1 = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    for value, weight in zip(values[:len(values) // 2],
-                             weights[:len(weights) // 2]):
-      _add_values(s1, value, weight)
-    s2 = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    for value, weight in zip(values[len(values) // 2:],
-                             weights[len(weights) // 2:]):
-      _add_values(s2, value, weight)
-
-    s1 = _pickle_roundtrip(s1)
-    s2 = _pickle_roundtrip(s2)
-    s1.Merge(s2)
-
-    result = s1.GetQuantiles(len(expected[0]) - 1).to_pylist()
-    np.testing.assert_almost_equal(expected, result)
-
-  @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
-  def test_compact(self, values, expected, num_streams, weights=None):
-    s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
-    num_values = len(values)
-    if weights is None:
-      weights = [None] * num_values
-    for value, weight in zip(values[:num_values // 2],
-                             weights[:num_values // 2]):
-      _add_values(s, value, weight)
-    s.Compact()
-    for value, weight in zip(values[num_values // 2:],
-                             weights[num_values // 2:]):
-      _add_values(s, value, weight)
-    s.Compact()
-
-    result = s.GetQuantiles(len(expected[0]) - 1).to_pylist()
-    np.testing.assert_almost_equal(expected, result)
-
-  @parameterized.parameters(*_ACCURACY_TEST_CASES)
-  def test_accuracy(self, max_num_elements, eps, num_quantiles):
-    s = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    values = pa.array(reversed(range(max_num_elements)))
-    weights = pa.array(range(max_num_elements))
-    total_weight = (max_num_elements - 1) * max_num_elements / 2
-
-    def cdf(x):
-      left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
-      return left_weight / total_weight
-
-    _add_values(s, values, weights)
-    quantiles, cumulative_weights = s.GetQuantilesAndCumulativeWeights(
-        num_quantiles - 1)
-    quantiles = quantiles.to_pylist()[0]
-    cumulative_weights = cumulative_weights.to_pylist()[0]
-    self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
-    self.assertEqual(total_weight, cumulative_weights[-1])
-
-  @parameterized.parameters(*_ACCURACY_TEST_CASES)
-  def test_accuracy_after_pickle(self, max_num_elements, eps, num_quantiles):
-    s = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    values = pa.array(reversed(range(max_num_elements)))
-    weights = pa.array(range(max_num_elements))
-    total_weight = (max_num_elements - 1) * max_num_elements / 2
-
-    def cdf(x):
-      left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
-      return left_weight / total_weight
-
-    _add_values(s, values[:max_num_elements // 2],
-                weights[:max_num_elements // 2])
-    s = _pickle_roundtrip(s)
-    _add_values(s, values[max_num_elements // 2:],
-                weights[max_num_elements // 2:])
-    s = _pickle_roundtrip(s)
-    quantiles, cumulative_weights = s.GetQuantilesAndCumulativeWeights(
-        num_quantiles - 1)
-    quantiles = quantiles.to_pylist()[0]
-    cumulative_weights = cumulative_weights.to_pylist()[0]
-    self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
-    self.assertEqual(total_weight, cumulative_weights[-1])
-
-  @parameterized.parameters(*_ACCURACY_TEST_CASES)
-  def test_accuracy_after_merge(self, max_num_elements, eps, num_quantiles):
-    s1 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    s2 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    s3 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    values = pa.array(reversed(range(max_num_elements)))
-    weights = pa.array(range(max_num_elements))
-    total_weight = (max_num_elements - 1) * max_num_elements / 2
-
-    def cdf(x):
-      left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
-      return left_weight / total_weight
-
-    _add_values(s1, values[:max_num_elements // 10],
-                weights[:max_num_elements // 10])
-    _add_values(s2, values[max_num_elements // 10:max_num_elements // 3],
-                weights[max_num_elements // 10:max_num_elements // 3])
-    _add_values(s3, values[max_num_elements // 3:],
-                weights[max_num_elements // 3:])
-    s2.Merge(s3)
-    s1.Merge(s2)
-    quantiles, cumulative_weights = s1.GetQuantilesAndCumulativeWeights(
-        num_quantiles - 1)
-    quantiles = quantiles.to_pylist()[0]
-    cumulative_weights = cumulative_weights.to_pylist()[0]
-    self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
-    self.assertEqual(total_weight, cumulative_weights[-1])
-
-  @parameterized.parameters(*_ACCURACY_TEST_CASES)
-  def test_accuracy_after_compact(self, max_num_elements, eps, num_quantiles):
-    s1 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    s2 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    s3 = sketches.QuantilesSketch(eps, max_num_elements, 1)
-    values = pa.array(reversed(range(max_num_elements)))
-    weights = pa.array(range(max_num_elements))
-    total_weight = (max_num_elements - 1) * max_num_elements / 2
-
-    def cdf(x):
-      left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
-      return left_weight / total_weight
-
-    _add_values(s1, values[:max_num_elements // 10],
-                weights[:max_num_elements // 10])
-    _add_values(s2, values[max_num_elements // 10:max_num_elements // 3],
-                weights[max_num_elements // 10:max_num_elements // 3])
-    _add_values(s3, values[max_num_elements // 3:],
-                weights[max_num_elements // 3:])
-    s2.Compact()
-    s3.Compact()
-    s2.Merge(s3)
-    s2.Compact()
-    s1.Compact()
-    s1.Merge(s2)
-    s1.Compact()
-    quantiles, cumulative_weights = s1.GetQuantilesAndCumulativeWeights(
-        num_quantiles - 1)
-    quantiles = quantiles.to_pylist()[0]
-    cumulative_weights = cumulative_weights.to_pylist()[0]
-    self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
-    self.assertEqual(total_weight, cumulative_weights[-1])
+    def assert_quantiles_accuracy(self, quantiles, cdf, eps, cumulative_weights):
+        # Helper function to validate quantiles accuracy given a cdf function.
+        # This function assumes that quantiles input values are of the form
+        # range(N). Note that this function also validates order of quantiles since
+        # their cdf values are compared to ordered expected levels.
+        num_quantiles = len(quantiles)
+        expected_cumulative_weights = [
+            i / (num_quantiles - 1) for i in range(num_quantiles)
+        ]
+        for expected_level, quantile, level in zip(
+            expected_cumulative_weights, quantiles, cumulative_weights
+        ):
+            level /= sum(cumulative_weights)
+            quantile_cdf = cdf(quantile)
+            left_cdf = cdf(quantile - 1)
+            right_cdf = cdf(quantile + 1)
+            error_msg = (
+                "Accuracy of the given quantile is not sufficient, "
+                f"quantile={quantile} of expected level {level}, its cdf is {quantile_cdf}; cdf of a value to "
+                f"the left is {left_cdf}, to the right is {right_cdf}. Error bound = {eps}."
+            )
+            self.assertTrue(
+                abs(expected_level - cdf(quantile)) < eps
+                or left_cdf < expected_level < right_cdf
+                or (expected_level == 0 and left_cdf == 0),
+                error_msg,
+            )
+        # Check that the true CDF and the reported CDF from cumulative weights are
+        # close up to eps.
+        full_true_cdf = np.array([cdf(q) for q in quantiles])
+        full_reported_cdf = np.array(cumulative_weights)
+        full_reported_cdf /= full_reported_cdf.max()
+        np.testing.assert_allclose(full_reported_cdf, full_true_cdf, atol=eps)
+
+    def test_quantiles_sketch_init(self):
+        with self.assertRaisesRegex(RuntimeError, "eps must be positive"):
+            _ = sketches.QuantilesSketch(0, 1 << 32, 1)
+
+        with self.assertRaisesRegex(RuntimeError, "max_num_elements must be >= 1."):
+            _ = sketches.QuantilesSketch(0.0001, 0, 1)
+
+        with self.assertRaisesRegex(RuntimeError, "num_streams must be >= 1."):
+            _ = sketches.QuantilesSketch(0.0001, 1 << 32, 0)
+
+        _ = sketches.QuantilesSketch(0.0001, 1 << 32, 1)
+
+    @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
+    def test_quantiles(
+        self,
+        values,
+        expected,
+        num_streams,
+        weights=None,
+    ):
+        s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        if weights is None:
+            weights = [None] * len(values)
+        for value, weight in zip(values, weights):
+            _add_values(s, value, weight)
+
+        result = s.GetQuantiles(len(expected[0]) - 1).to_pylist()
+        np.testing.assert_almost_equal(expected, result)
+
+    @parameterized.named_parameters(*_QUANTILES_CUMULATIVE_WEIGHTS_TEST_CASES)
+    def test_cumulative_weights(
+        self, values, expected, num_streams, expected_weights=None, weights=None
+    ):
+        s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        if weights is None:
+            weights = [None] * len(values)
+        for value, weight in zip(values, weights):
+            _add_values(s, value, weight)
+        result_quantiles, result_weights = s.GetQuantilesAndCumulativeWeights(
+            len(expected[0]) - 1
+        )
+        result_quantiles = result_quantiles.to_pylist()
+        result_weights = result_weights.to_pylist()
+
+        if expected_weights is None:
+            # Expected cumulative weights not provided, so compute them.
+            assert num_streams == 1
+            values_streams = _partition_streams(values, num_streams)
+            if weights[0] is None:
+                weights_streams = num_streams * [None]
+            else:
+                weights_streams = _partition_streams(weights, num_streams)
+            for stream_idx in range(num_streams):
+                expected_quantiles = expected[stream_idx]
+                cumul_weights = _get_cumulative_weight(
+                    values_streams[stream_idx],
+                    expected_quantiles,
+                    weights_streams[stream_idx],
+                )
+                np.testing.assert_almost_equal(
+                    result_weights[stream_idx], cumul_weights
+                )
+        else:
+            np.testing.assert_almost_equal(expected_weights, result_weights)
+
+    @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
+    def test_pickle(self, values, expected, num_streams, weights=None):
+        s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        if weights is None:
+            weights = [None] * len(values)
+        for value, weight in zip(values, weights):
+            _add_values(s, value, weight)
+        pickled = pickle.dumps(s)
+        self.assertIsInstance(pickled, bytes)
+        unpickled = pickle.loads(pickled)
+        self.assertIsInstance(unpickled, sketches.QuantilesSketch)
+        result = unpickled.GetQuantiles(len(expected[0]) - 1).to_pylist()
+        np.testing.assert_almost_equal(expected, result)
+
+    @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
+    def test_merge(self, values, expected, num_streams, weights=None):
+        if weights is None:
+            weights = [None] * len(values)
+        s1 = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        for value, weight in zip(
+            values[: len(values) // 2], weights[: len(weights) // 2]
+        ):
+            _add_values(s1, value, weight)
+        s2 = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        for value, weight in zip(
+            values[len(values) // 2 :], weights[len(weights) // 2 :]
+        ):
+            _add_values(s2, value, weight)
+
+        s1 = _pickle_roundtrip(s1)
+        s2 = _pickle_roundtrip(s2)
+        s1.Merge(s2)
+
+        result = s1.GetQuantiles(len(expected[0]) - 1).to_pylist()
+        np.testing.assert_almost_equal(expected, result)
+
+    @parameterized.named_parameters(*_QUANTILES_TEST_CASES)
+    def test_compact(self, values, expected, num_streams, weights=None):
+        s = sketches.QuantilesSketch(0.00001, 1 << 32, num_streams)
+        num_values = len(values)
+        if weights is None:
+            weights = [None] * num_values
+        for value, weight in zip(values[: num_values // 2], weights[: num_values // 2]):
+            _add_values(s, value, weight)
+        s.Compact()
+        for value, weight in zip(values[num_values // 2 :], weights[num_values // 2 :]):
+            _add_values(s, value, weight)
+        s.Compact()
+
+        result = s.GetQuantiles(len(expected[0]) - 1).to_pylist()
+        np.testing.assert_almost_equal(expected, result)
+
+    @parameterized.parameters(*_ACCURACY_TEST_CASES)
+    def test_accuracy(self, max_num_elements, eps, num_quantiles):
+        s = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        values = pa.array(reversed(range(max_num_elements)))
+        weights = pa.array(range(max_num_elements))
+        total_weight = (max_num_elements - 1) * max_num_elements / 2
+
+        def cdf(x):
+            left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
+            return left_weight / total_weight
+
+        _add_values(s, values, weights)
+        quantiles, cumulative_weights = s.GetQuantilesAndCumulativeWeights(
+            num_quantiles - 1
+        )
+        quantiles = quantiles.to_pylist()[0]
+        cumulative_weights = cumulative_weights.to_pylist()[0]
+        self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
+        self.assertEqual(total_weight, cumulative_weights[-1])
+
+    @parameterized.parameters(*_ACCURACY_TEST_CASES)
+    def test_accuracy_after_pickle(self, max_num_elements, eps, num_quantiles):
+        s = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        values = pa.array(reversed(range(max_num_elements)))
+        weights = pa.array(range(max_num_elements))
+        total_weight = (max_num_elements - 1) * max_num_elements / 2
+
+        def cdf(x):
+            left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
+            return left_weight / total_weight
+
+        _add_values(
+            s, values[: max_num_elements // 2], weights[: max_num_elements // 2]
+        )
+        s = _pickle_roundtrip(s)
+        _add_values(
+            s, values[max_num_elements // 2 :], weights[max_num_elements // 2 :]
+        )
+        s = _pickle_roundtrip(s)
+        quantiles, cumulative_weights = s.GetQuantilesAndCumulativeWeights(
+            num_quantiles - 1
+        )
+        quantiles = quantiles.to_pylist()[0]
+        cumulative_weights = cumulative_weights.to_pylist()[0]
+        self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
+        self.assertEqual(total_weight, cumulative_weights[-1])
+
+    @parameterized.parameters(*_ACCURACY_TEST_CASES)
+    def test_accuracy_after_merge(self, max_num_elements, eps, num_quantiles):
+        s1 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        s2 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        s3 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        values = pa.array(reversed(range(max_num_elements)))
+        weights = pa.array(range(max_num_elements))
+        total_weight = (max_num_elements - 1) * max_num_elements / 2
+
+        def cdf(x):
+            left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
+            return left_weight / total_weight
+
+        _add_values(
+            s1, values[: max_num_elements // 10], weights[: max_num_elements // 10]
+        )
+        _add_values(
+            s2,
+            values[max_num_elements // 10 : max_num_elements // 3],
+            weights[max_num_elements // 10 : max_num_elements // 3],
+        )
+        _add_values(
+            s3, values[max_num_elements // 3 :], weights[max_num_elements // 3 :]
+        )
+        s2.Merge(s3)
+        s1.Merge(s2)
+        quantiles, cumulative_weights = s1.GetQuantilesAndCumulativeWeights(
+            num_quantiles - 1
+        )
+        quantiles = quantiles.to_pylist()[0]
+        cumulative_weights = cumulative_weights.to_pylist()[0]
+        self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
+        self.assertEqual(total_weight, cumulative_weights[-1])
+
+    @parameterized.parameters(*_ACCURACY_TEST_CASES)
+    def test_accuracy_after_compact(self, max_num_elements, eps, num_quantiles):
+        s1 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        s2 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        s3 = sketches.QuantilesSketch(eps, max_num_elements, 1)
+        values = pa.array(reversed(range(max_num_elements)))
+        weights = pa.array(range(max_num_elements))
+        total_weight = (max_num_elements - 1) * max_num_elements / 2
+
+        def cdf(x):
+            left_weight = (2 * (max_num_elements - 1) - x) * (x + 1) / 2
+            return left_weight / total_weight
+
+        _add_values(
+            s1, values[: max_num_elements // 10], weights[: max_num_elements // 10]
+        )
+        _add_values(
+            s2,
+            values[max_num_elements // 10 : max_num_elements // 3],
+            weights[max_num_elements // 10 : max_num_elements // 3],
+        )
+        _add_values(
+            s3, values[max_num_elements // 3 :], weights[max_num_elements // 3 :]
+        )
+        s2.Compact()
+        s3.Compact()
+        s2.Merge(s3)
+        s2.Compact()
+        s1.Compact()
+        s1.Merge(s2)
+        s1.Compact()
+        quantiles, cumulative_weights = s1.GetQuantilesAndCumulativeWeights(
+            num_quantiles - 1
+        )
+        quantiles = quantiles.to_pylist()[0]
+        cumulative_weights = cumulative_weights.to_pylist()[0]
+        self.assert_quantiles_accuracy(quantiles, cdf, eps, cumulative_weights)
+        self.assertEqual(total_weight, cumulative_weights[-1])
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/statistics/__init__.py b/tfx_bsl/statistics/__init__.py
index a753202d..89086707 100644
--- a/tfx_bsl/statistics/__init__.py
+++ b/tfx_bsl/statistics/__init__.py
@@ -13,19 +13,24 @@
 # limitations under the License.
 
 """Module level imports for tfx_bsl.statistics."""
+
 # pylint: disable=unused-import
 # pytype: disable=import-error
 # pylint: disable=g-import-not-at-top
 # See b/148667210 for why the ImportError is ignored.
 try:
-  from tfx_bsl.cc.tfx_bsl_extension.statistics import DatasetListAccumulator
-  from tfx_bsl.cc.tfx_bsl_extension.statistics_sql import EvaluateUnaryStatsPredicate
-  from tfx_bsl.cc.tfx_bsl_extension.statistics_sql import EvaluateBinaryStatsPredicate
+    from tfx_bsl.cc.tfx_bsl_extension.statistics import DatasetListAccumulator
+    from tfx_bsl.cc.tfx_bsl_extension.statistics_sql import (
+        EvaluateBinaryStatsPredicate,
+        EvaluateUnaryStatsPredicate,
+    )
 except ImportError as err:
-  import sys
-  sys.stderr.write("Error importing tfx_bsl_extension.statistics. "
-                   "Some tfx_bsl functionalities are not available: {}"
-                   .format(err))
+    import sys
+
+    sys.stderr.write(
+        "Error importing tfx_bsl_extension.statistics. "
+        f"Some tfx_bsl functionalities are not available: {err}"
+    )
 
 # pylint: enable=g-import-not-at-top
 # pytype: enable=import-error
diff --git a/tfx_bsl/statistics/merge_util.py b/tfx_bsl/statistics/merge_util.py
index 5d6ee114..667306ef 100644
--- a/tfx_bsl/statistics/merge_util.py
+++ b/tfx_bsl/statistics/merge_util.py
@@ -12,10 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Utilities for merging statistics proto shards."""
+
 from typing import Iterable, Iterator
-from tfx_bsl import statistics
+
 from tensorflow_metadata.proto.v0 import statistics_pb2
 
+from tfx_bsl import statistics
+
 #  TODO(b/202910677): Consider removing this file once/if proto caster issues
 #  are resolved.
 
@@ -23,41 +26,45 @@
 def merge_dataset_feature_statistics(
     stats_protos: Iterable[statistics_pb2.DatasetFeatureStatistics],
 ) -> statistics_pb2.DatasetFeatureStatisticsList:
-  """Merge a collection of DatasetFeatureStatistics into a single result.
+    """Merge a collection of DatasetFeatureStatistics into a single result.
 
-  Args:
-    stats_protos: An iterable over DatasetFeatureStatistics shards.
+    Args:
+    ----
+      stats_protos: An iterable over DatasetFeatureStatistics shards.
 
-  Returns:
-    A DatasetFeatureStatisticsList formed by merging all inputs.
-  """
-  acc = statistics.DatasetListAccumulator()
-  for stats_proto in stats_protos:
-    acc.MergeDatasetFeatureStatistics(stats_proto.SerializeToString())
-  res_str = acc.Get()
-  result = statistics_pb2.DatasetFeatureStatisticsList()
-  result.ParseFromString(res_str)
-  return result
+    Returns:
+    -------
+      A DatasetFeatureStatisticsList formed by merging all inputs.
+    """
+    acc = statistics.DatasetListAccumulator()
+    for stats_proto in stats_protos:
+        acc.MergeDatasetFeatureStatistics(stats_proto.SerializeToString())
+    res_str = acc.Get()
+    result = statistics_pb2.DatasetFeatureStatisticsList()
+    result.ParseFromString(res_str)
+    return result
 
 
 def _flatten_stats(
-    stats_protos: Iterable[statistics_pb2.DatasetFeatureStatisticsList]
+    stats_protos: Iterable[statistics_pb2.DatasetFeatureStatisticsList],
 ) -> Iterator[statistics_pb2.DatasetFeatureStatistics]:
-  """Yields DatasetFeatureStatistics in a DatasetFeatureStatisticsList."""
-  for stats_list in stats_protos:
-    for dataset in stats_list.datasets:
-      yield dataset
+    """Yields DatasetFeatureStatistics in a DatasetFeatureStatisticsList."""
+    for stats_list in stats_protos:
+        for dataset in stats_list.datasets:
+            yield dataset
 
 
 def merge_dataset_feature_statistics_list(
     stats_protos: Iterable[statistics_pb2.DatasetFeatureStatisticsList],
 ) -> statistics_pb2.DatasetFeatureStatisticsList:
-  """Merge a collection of DatasetFeatureStatisticsList into a single result.
+    """Merge a collection of DatasetFeatureStatisticsList into a single result.
 
-  Args:
-    stats_protos: An iterable over DatasetFeatureStatisticsList shards.
+    Args:
+    ----
+      stats_protos: An iterable over DatasetFeatureStatisticsList shards.
 
-  Returns:
-    A DatasetFeatureStatisticsList formed by merging all inputs.
-  """
-  return merge_dataset_feature_statistics(_flatten_stats(stats_protos))
+    Returns:
+    -------
+      A DatasetFeatureStatisticsList formed by merging all inputs.
+    """
+    return merge_dataset_feature_statistics(_flatten_stats(stats_protos))
diff --git a/tfx_bsl/statistics/merge_util_test.py b/tfx_bsl/statistics/merge_util_test.py
index 4f6f822b..3dd59b2f 100644
--- a/tfx_bsl/statistics/merge_util_test.py
+++ b/tfx_bsl/statistics/merge_util_test.py
@@ -13,44 +13,47 @@
 # limitations under the License.
 """Tests for tfx_bsl.statistics.DatasetListAccumulator."""
 
-from tfx_bsl.statistics import merge_util
-
 from absl.testing import absltest
 from tensorflow_metadata.proto.v0 import statistics_pb2
 
+from tfx_bsl.statistics import merge_util
 
-class MergeDatasetFeatureStatisticsTest(absltest.TestCase):
 
-  # Basic tests that the python bindings work; more coverage is in
-  # merge_util_test.cc
-  def test_merges_two_inputs(self):
-    values = [
-        statistics_pb2.DatasetFeatureStatistics(name="slice1"),
-        statistics_pb2.DatasetFeatureStatistics(name="slice2")
-    ]
-    result = merge_util.merge_dataset_feature_statistics(values)
-    expected = statistics_pb2.DatasetFeatureStatisticsList()
-    expected.datasets.extend(values)
-    self.assertEqual(expected, result)
+class MergeDatasetFeatureStatisticsTest(absltest.TestCase):
+    # Basic tests that the python bindings work; more coverage is in
+    # merge_util_test.cc
+    def test_merges_two_inputs(self):
+        values = [
+            statistics_pb2.DatasetFeatureStatistics(name="slice1"),
+            statistics_pb2.DatasetFeatureStatistics(name="slice2"),
+        ]
+        result = merge_util.merge_dataset_feature_statistics(values)
+        expected = statistics_pb2.DatasetFeatureStatisticsList()
+        expected.datasets.extend(values)
+        self.assertEqual(expected, result)
 
 
 class MergeDatasetFeatureStatisticsListTest(absltest.TestCase):
+    # Basic tests that the python bindings work; more coverage is in
+    # merge_util_test.cc
+    def test_merges_two_inputs(self):
+        values = [
+            statistics_pb2.DatasetFeatureStatisticsList(
+                datasets=[statistics_pb2.DatasetFeatureStatistics(name="slice1")]
+            ),
+            statistics_pb2.DatasetFeatureStatisticsList(
+                datasets=[statistics_pb2.DatasetFeatureStatistics(name="slice2")]
+            ),
+        ]
+        result = merge_util.merge_dataset_feature_statistics_list(values)
+        expected = statistics_pb2.DatasetFeatureStatisticsList(
+            datasets=[
+                statistics_pb2.DatasetFeatureStatistics(name="slice1"),
+                statistics_pb2.DatasetFeatureStatistics(name="slice2"),
+            ]
+        )
+        self.assertEqual(expected, result)
 
-  # Basic tests that the python bindings work; more coverage is in
-  # merge_util_test.cc
-  def test_merges_two_inputs(self):
-    values = [
-        statistics_pb2.DatasetFeatureStatisticsList(
-            datasets=[statistics_pb2.DatasetFeatureStatistics(name="slice1")]),
-        statistics_pb2.DatasetFeatureStatisticsList(
-            datasets=[statistics_pb2.DatasetFeatureStatistics(name="slice2")])
-    ]
-    result = merge_util.merge_dataset_feature_statistics_list(values)
-    expected = statistics_pb2.DatasetFeatureStatisticsList(datasets=[
-        statistics_pb2.DatasetFeatureStatistics(name="slice1"),
-        statistics_pb2.DatasetFeatureStatistics(name="slice2")
-    ])
-    self.assertEqual(expected, result)
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/telemetry/collection.py b/tfx_bsl/telemetry/collection.py
index ac970443..c886803d 100644
--- a/tfx_bsl/telemetry/collection.py
+++ b/tfx_bsl/telemetry/collection.py
@@ -18,50 +18,52 @@
 
 import apache_beam as beam
 import pyarrow as pa
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
 # TODO(b/68154497): remove this. # pylint: disable=no-value-for-parameter
 
 
-def _IncrementCounter(element: int, counter_namespace: str,
-                      counter_name: str) -> int:
-  counter = beam.metrics.Metrics.counter(counter_namespace, counter_name)
-  counter.inc(element)
-  return element
+def _IncrementCounter(element: int, counter_namespace: str, counter_name: str) -> int:
+    counter = beam.metrics.Metrics.counter(counter_namespace, counter_name)
+    counter.inc(element)
+    return element
 
 
 @beam.ptransform_fn
-def TrackRecordBatchBytes(dataset: beam.PCollection[pa.RecordBatch],
-                          counter_namespace: str,
-                          counter_name: str) -> beam.PCollection[int]:
-  """Gathers telemetry on input record batch."""
-  counter = beam.metrics.Metrics.counter(counter_namespace, counter_name)
+def TrackRecordBatchBytes(
+    dataset: beam.PCollection[pa.RecordBatch], counter_namespace: str, counter_name: str
+) -> beam.PCollection[int]:
+    """Gathers telemetry on input record batch."""
+    counter = beam.metrics.Metrics.counter(counter_namespace, counter_name)
 
-  def counted_bytes(rb: pa.RecordBatch):
-    result = rb.nbytes
-    counter.inc(result)
-    return result
+    def counted_bytes(rb: pa.RecordBatch):
+        result = rb.nbytes
+        counter.inc(result)
+        return result
 
-  return dataset | "CountedBytes" >> beam.Map(counted_bytes)
+    return dataset | "CountedBytes" >> beam.Map(counted_bytes)
 
 
 def _IncrementTensorRepresentationCounters(
     tensor_representations: Dict[str, schema_pb2.TensorRepresentation],
-    counter_namespace: str):
-  kind_counter = collections.Counter(
-      representation.WhichOneof("kind")
-      for representation in tensor_representations.values())
-  for kind, count in kind_counter.items():
-    _IncrementCounter(count, counter_namespace, kind)
+    counter_namespace: str,
+):
+    kind_counter = collections.Counter(
+        representation.WhichOneof("kind")
+        for representation in tensor_representations.values()
+    )
+    for kind, count in kind_counter.items():
+        _IncrementCounter(count, counter_namespace, kind)
 
 
 @beam.ptransform_fn
 def TrackTensorRepresentations(
-    tensor_representations: beam.PCollection[Dict[
-        str, schema_pb2.TensorRepresentation]],
-    counter_namespace: str) -> beam.PCollection[None]:
-  """Gathers telemetry on TensorRepresentation types."""
-  return (tensor_representations | "IncrementCounters" >> beam.Map(
-      _IncrementTensorRepresentationCounters,
-      counter_namespace=counter_namespace))
+    tensor_representations: beam.PCollection[
+        Dict[str, schema_pb2.TensorRepresentation]
+    ],
+    counter_namespace: str,
+) -> beam.PCollection[None]:
+    """Gathers telemetry on TensorRepresentation types."""
+    return tensor_representations | "IncrementCounters" >> beam.Map(
+        _IncrementTensorRepresentationCounters, counter_namespace=counter_namespace
+    )
diff --git a/tfx_bsl/telemetry/collection_test.py b/tfx_bsl/telemetry/collection_test.py
index 93497f10..b8342349 100644
--- a/tfx_bsl/telemetry/collection_test.py
+++ b/tfx_bsl/telemetry/collection_test.py
@@ -15,85 +15,91 @@
 
 import apache_beam as beam
 import pyarrow as pa
-from tfx_bsl.beam import test_helpers
-from tfx_bsl.telemetry import collection
-
 from absl.testing import absltest
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.beam import test_helpers
+from tfx_bsl.telemetry import collection
 
-class CollectionTest(absltest.TestCase):
 
-  def testTrackRecordBatchBytes(self):
-    inputs = pa.RecordBatch.from_arrays([pa.array([1, 2, 3], type=pa.int32())],
-                                        ["f1"])
-    expected_num_bytes = inputs.nbytes
+class CollectionTest(absltest.TestCase):
+    def testTrackRecordBatchBytes(self):
+        inputs = pa.RecordBatch.from_arrays(
+            [pa.array([1, 2, 3], type=pa.int32())], ["f1"]
+        )
+        expected_num_bytes = inputs.nbytes
 
-    with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p:
-      _ = (
-          p | beam.Create([inputs])
-          | collection.TrackRecordBatchBytes("TestNamespace",
-                                             "num_bytes_count"))
+        with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p:
+            _ = (
+                p
+                | beam.Create([inputs])
+                | collection.TrackRecordBatchBytes("TestNamespace", "num_bytes_count")
+            )
 
-    pipeline_result = p.run()
-    result_metrics = pipeline_result.metrics()
-    actual_counter = result_metrics.query(
-        beam.metrics.metric.MetricsFilter().with_name(
-            "num_bytes_count"))["counters"]
-    self.assertLen(actual_counter, 1)
-    self.assertEqual(actual_counter[0].committed, expected_num_bytes)
+        pipeline_result = p.run()
+        result_metrics = pipeline_result.metrics()
+        actual_counter = result_metrics.query(
+            beam.metrics.metric.MetricsFilter().with_name("num_bytes_count")
+        )["counters"]
+        self.assertLen(actual_counter, 1)
+        self.assertEqual(actual_counter[0].committed, expected_num_bytes)
 
-  def testTrackRecordTensorRepresentations(self):
-    num_dense_tensors = 3
-    num_varlen_sparse_tensors = 2
-    num_sparse_tensors = 1
-    num_ragged_tensors = 4
-    tensor_representations = {}
-    for i in range(num_dense_tensors):
-      tensor_representations[f"dense{i}"] = (
-          schema_pb2.TensorRepresentation(
-              dense_tensor=schema_pb2.TensorRepresentation.DenseTensor()))
-    for i in range(num_varlen_sparse_tensors):
-      tensor_representations[f"varlen{i}"] = (
-          schema_pb2.TensorRepresentation(
-              varlen_sparse_tensor=schema_pb2.TensorRepresentation
-              .VarLenSparseTensor()))
-    for i in range(num_sparse_tensors):
-      tensor_representations[f"sparse{i}"] = (
-          schema_pb2.TensorRepresentation(
-              sparse_tensor=schema_pb2.TensorRepresentation.SparseTensor()))
-    for i in range(num_ragged_tensors):
-      tensor_representations[f"ragged{i}"] = (
-          schema_pb2.TensorRepresentation(
-              ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor()))
+    def testTrackRecordTensorRepresentations(self):
+        num_dense_tensors = 3
+        num_varlen_sparse_tensors = 2
+        num_sparse_tensors = 1
+        num_ragged_tensors = 4
+        tensor_representations = {}
+        for i in range(num_dense_tensors):
+            tensor_representations[f"dense{i}"] = schema_pb2.TensorRepresentation(
+                dense_tensor=schema_pb2.TensorRepresentation.DenseTensor()
+            )
+        for i in range(num_varlen_sparse_tensors):
+            tensor_representations[f"varlen{i}"] = schema_pb2.TensorRepresentation(
+                varlen_sparse_tensor=schema_pb2.TensorRepresentation.VarLenSparseTensor()
+            )
+        for i in range(num_sparse_tensors):
+            tensor_representations[f"sparse{i}"] = schema_pb2.TensorRepresentation(
+                sparse_tensor=schema_pb2.TensorRepresentation.SparseTensor()
+            )
+        for i in range(num_ragged_tensors):
+            tensor_representations[f"ragged{i}"] = schema_pb2.TensorRepresentation(
+                ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor()
+            )
 
-    expected_counters = {
-        "dense_tensor": num_dense_tensors,
-        "varlen_sparse_tensor": num_varlen_sparse_tensors,
-        "sparse_tensor": num_sparse_tensors,
-        "ragged_tensor": num_ragged_tensors,
-    }
+        expected_counters = {
+            "dense_tensor": num_dense_tensors,
+            "varlen_sparse_tensor": num_varlen_sparse_tensors,
+            "sparse_tensor": num_sparse_tensors,
+            "ragged_tensor": num_ragged_tensors,
+        }
 
-    with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p:
-      _ = (
-          p | beam.Create([tensor_representations])
-          | collection.TrackTensorRepresentations(
-              counter_namespace="TestNamespace"))
+        with beam.Pipeline(**test_helpers.make_test_beam_pipeline_kwargs()) as p:
+            _ = (
+                p
+                | beam.Create([tensor_representations])
+                | collection.TrackTensorRepresentations(
+                    counter_namespace="TestNamespace"
+                )
+            )
 
-    pipeline_result = p.run()
-    result_metrics = pipeline_result.metrics()
-    for kind, expected_count in expected_counters.items():
-      actual_counter = result_metrics.query(
-          beam.metrics.metric.MetricsFilter().with_name(kind))["counters"]
-      self.assertLen(
-          actual_counter,
-          1,
-          msg=f"Actual and expected lengths of {kind} counter are different.")
-      self.assertEqual(
-          actual_counter[0].committed,
-          expected_count,
-          msg=f"Actual and expected values for {kind} counter are different.")
+        pipeline_result = p.run()
+        result_metrics = pipeline_result.metrics()
+        for kind, expected_count in expected_counters.items():
+            actual_counter = result_metrics.query(
+                beam.metrics.metric.MetricsFilter().with_name(kind)
+            )["counters"]
+            self.assertLen(
+                actual_counter,
+                1,
+                msg=f"Actual and expected lengths of {kind} counter are different.",
+            )
+            self.assertEqual(
+                actual_counter[0].committed,
+                expected_count,
+                msg=f"Actual and expected values for {kind} counter are different.",
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/telemetry/util.py b/tfx_bsl/telemetry/util.py
index b3260f0d..1f0fc96f 100644
--- a/tfx_bsl/telemetry/util.py
+++ b/tfx_bsl/telemetry/util.py
@@ -17,13 +17,12 @@
 
 
 def MakeTfxNamespace(descriptors: Sequence[str]) -> str:
-  """Makes a TFX beam metric namespace from a list of descriptors."""
-  return AppendToNamespace("tfx", descriptors)
+    """Makes a TFX beam metric namespace from a list of descriptors."""
+    return AppendToNamespace("tfx", descriptors)
 
 
-def AppendToNamespace(namespace: str,
-                      descriptors_to_append: Sequence[str]) -> str:
-  """Appends a list of descriptors to a beam metric namespace."""
-  if descriptors_to_append:
-    return namespace + "." + ".".join(descriptors_to_append)
-  return namespace
+def AppendToNamespace(namespace: str, descriptors_to_append: Sequence[str]) -> str:
+    """Appends a list of descriptors to a beam metric namespace."""
+    if descriptors_to_append:
+        return namespace + "." + ".".join(descriptors_to_append)
+    return namespace
diff --git a/tfx_bsl/telemetry/util_test.py b/tfx_bsl/telemetry/util_test.py
index 0657877c..932810ba 100644
--- a/tfx_bsl/telemetry/util_test.py
+++ b/tfx_bsl/telemetry/util_test.py
@@ -13,24 +13,25 @@
 # limitations under the License.
 """Tests for tfx_bsl.telemetry.util."""
 
-from tfx_bsl.telemetry import util
 from absl.testing import absltest
 
+from tfx_bsl.telemetry import util
+
 
 class UtilTest(absltest.TestCase):
+    def testMakeTfxNamespace(self):
+        self.assertEqual("tfx", util.MakeTfxNamespace([]))
+        self.assertEqual(
+            "tfx.some.component", util.MakeTfxNamespace(("some", "component"))
+        )
 
-  def testMakeTfxNamespace(self):
-    self.assertEqual("tfx", util.MakeTfxNamespace([]))
-    self.assertEqual("tfx.some.component",
-                     util.MakeTfxNamespace(("some", "component")))
-
-  def testAppendToNamespace(self):
-    self.assertEqual("some_namespace",
-                     util.AppendToNamespace("some_namespace", []))
-    self.assertEqual(
-        "some_namespace.some.component",
-        util.AppendToNamespace("some_namespace", ["some", "component"]))
+    def testAppendToNamespace(self):
+        self.assertEqual("some_namespace", util.AppendToNamespace("some_namespace", []))
+        self.assertEqual(
+            "some_namespace.some.component",
+            util.AppendToNamespace("some_namespace", ["some", "component"]),
+        )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/test_util/run_all_tests.py b/tfx_bsl/test_util/run_all_tests.py
index 22c03c21..9eb34681 100644
--- a/tfx_bsl/test_util/run_all_tests.py
+++ b/tfx_bsl/test_util/run_all_tests.py
@@ -29,187 +29,195 @@
 import time
 from typing import Dict, List, Optional, Text
 
-from absl import app
-from absl import flags
-from absl import logging
-
+from absl import app, flags, logging
 
 flags.DEFINE_list(
-    "start_dirs", None,
-    "Comma separated directories to recursively search test "
-    "modules from. Required.")
-flags.DEFINE_string("python", None,
-                    "path to Python binary. If not set, use the binary that "
-                    "runs this script.")
+    "start_dirs",
+    None,
+    "Comma separated directories to recursively search test " "modules from. Required.",
+)
+flags.DEFINE_string(
+    "python",
+    None,
+    "path to Python binary. If not set, use the binary that " "runs this script.",
+)
 flags.DEFINE_integer(
-    "parallelism", None, "number of sub-processes to run tests at "
-    " the same time.")
-flags.DEFINE_list("sharded_tests", None,
-                  "Comma separated sharded tests, in the format of "
-                  "\"<file_name>:<num_shards>\". Note that the test must "
-                  "implement Bazel's test sharding protocol.")
+    "parallelism", None, "number of sub-processes to run tests at " " the same time."
+)
+flags.DEFINE_list(
+    "sharded_tests",
+    None,
+    "Comma separated sharded tests, in the format of "
+    '"<file_name>:<num_shards>". Note that the test must '
+    "implement Bazel's test sharding protocol.",
+)
 
 FLAGS = flags.FLAGS
 _TEST_FILENAME_SUFFIX = "_test.py"
 
 
-class _Test(object):
-  """Represents a test (a python executable)."""
-
-  def __init__(self, path: Text, shard_id: int, total_shards: int):
-    self.path = path
-    self.subprocess = None
-    self.stdout = None
-    self.stderr = None
-    self.begin_time = None
-    self.finish_time = None
-    self.shard_id = shard_id
-    self.total_shards = total_shards
-
-  def __str__(self):
-    return "%s [shard %d of %d shard(s)]" % (
-        self.path, self.shard_id, self.total_shards)
-
-  def __lt__(self, other):
-    return (self.path, self.shard_id) < (other.path, other.shard_id)
-
-  def __eq__(self, other):
-    return (self.path, self.shard_id) == (other.path, other.shard_id)
-
-  def __hash__(self):
-    return hash(str(self))
-
-  def Run(self) -> None:
-    """Run the test in a subprocess."""
-    logging.info("Running %s in a subprocess...", self)
-    self.stdout = tempfile.TemporaryFile()
-    self.stderr = tempfile.TemporaryFile()
-    self.begin_time = time.time()
-    env = os.environ.copy()
-    # Give each test program a separate test_tmpdir so they don't overwrite
-    # each other when running in parallel.
-    env["TEST_TMPDIR"] = tempfile.mkdtemp()
-    # Bazel's test sharding protocol:
-    # https://docs.bazel.build/versions/master/test-encyclopedia.html
-    if self.total_shards > 1:
-      env["TEST_TOTAL_SHARDS"] = str(self.total_shards)
-      env["TEST_SHARD_INDEX"] = str(self.shard_id)
-
-    self.subprocess = subprocess.Popen(
-        [_GetPython(), self.path], stdout=self.stdout, stderr=self.stderr,
-        env=env)
-
-  def Finished(self) -> bool:
-    assert self.subprocess is not None
-    finished = self.subprocess.poll() is not None
-    if finished and self.finish_time is None:
-      self.finish_time = time.time()
-    return finished
-
-  def Succeeded(self) -> bool:
-    assert self.subprocess is not None
-    return self.subprocess.poll() == 0
-
-  def PrintLogs(self) -> None:
-    """Prints stdout and stderr outputs of the test."""
-    assert self.Finished()
-    for f, stream_name in (
-        (self.stdout, "STDOUT"), (self.stderr, "STDERR")):
-      f.flush()
-      f.seek(0)
-      # Since we collected binary data, we have to write binary data.
-      encoded = (stream_name.encode(), str(self).encode())
-      sys.stdout.buffer.write(b"BEGIN %s of test %s\n" % encoded)
-      sys.stdout.buffer.write(f.read())
-      sys.stdout.buffer.write(b"END %s of test %s\n" % encoded)
-      sys.stdout.buffer.flush()
-
-
-def _DiscoverTests(root_dirs: List[Text],
-                   test_to_shards: Dict[Text, int]) -> List[_Test]:
-  """Finds tests under `root_dirs`. Creates and returns _Test objects."""
-  result = []
-  for d in root_dirs:
-    for root, _, files in os.walk(d):
-      for f in files:
-        if f.endswith(_TEST_FILENAME_SUFFIX):
-          shards = test_to_shards.get(f, 1)
-          for shard in range(0, shards):
-            result.append(_Test(os.path.join(root, f), shard, shards))
-  logging.info("Discovered %d tests", len(result))
-  return result
+class _Test:
+    """Represents a test (a python executable)."""
+
+    def __init__(self, path: str, shard_id: int, total_shards: int):
+        self.path = path
+        self.subprocess = None
+        self.stdout = None
+        self.stderr = None
+        self.begin_time = None
+        self.finish_time = None
+        self.shard_id = shard_id
+        self.total_shards = total_shards
+
+    def __str__(self):
+        return "%s [shard %d of %d shard(s)]" % (
+            self.path,
+            self.shard_id,
+            self.total_shards,
+        )
+
+    def __lt__(self, other):
+        return (self.path, self.shard_id) < (other.path, other.shard_id)
+
+    def __eq__(self, other):
+        return (self.path, self.shard_id) == (other.path, other.shard_id)
+
+    def __hash__(self):
+        return hash(str(self))
+
+    def Run(self) -> None:
+        """Run the test in a subprocess."""
+        logging.info("Running %s in a subprocess...", self)
+        self.stdout = tempfile.TemporaryFile()
+        self.stderr = tempfile.TemporaryFile()
+        self.begin_time = time.time()
+        env = os.environ.copy()
+        # Give each test program a separate test_tmpdir so they don't overwrite
+        # each other when running in parallel.
+        env["TEST_TMPDIR"] = tempfile.mkdtemp()
+        # Bazel's test sharding protocol:
+        # https://docs.bazel.build/versions/master/test-encyclopedia.html
+        if self.total_shards > 1:
+            env["TEST_TOTAL_SHARDS"] = str(self.total_shards)
+            env["TEST_SHARD_INDEX"] = str(self.shard_id)
+
+        self.subprocess = subprocess.Popen(
+            [_GetPython(), self.path], stdout=self.stdout, stderr=self.stderr, env=env
+        )
+
+    def Finished(self) -> bool:
+        assert self.subprocess is not None
+        finished = self.subprocess.poll() is not None
+        if finished and self.finish_time is None:
+            self.finish_time = time.time()
+        return finished
+
+    def Succeeded(self) -> bool:
+        assert self.subprocess is not None
+        return self.subprocess.poll() == 0
+
+    def PrintLogs(self) -> None:
+        """Prints stdout and stderr outputs of the test."""
+        assert self.Finished()
+        for f, stream_name in ((self.stdout, "STDOUT"), (self.stderr, "STDERR")):
+            f.flush()
+            f.seek(0)
+            # Since we collected binary data, we have to write binary data.
+            encoded = (stream_name.encode(), str(self).encode())
+            sys.stdout.buffer.write(b"BEGIN %s of test %s\n" % encoded)
+            sys.stdout.buffer.write(f.read())
+            sys.stdout.buffer.write(b"END %s of test %s\n" % encoded)
+            sys.stdout.buffer.flush()
+
+
+def _DiscoverTests(root_dirs: List[str], test_to_shards: Dict[str, int]) -> List[_Test]:
+    """Finds tests under `root_dirs`. Creates and returns _Test objects."""
+    result = []
+    for d in root_dirs:
+        for root, _, files in os.walk(d):
+            for f in files:
+                if f.endswith(_TEST_FILENAME_SUFFIX):
+                    shards = test_to_shards.get(f, 1)
+                    for shard in range(0, shards):
+                        result.append(_Test(os.path.join(root, f), shard, shards))
+    logging.info("Discovered %d tests", len(result))
+    return result
 
 
 def _RunTests(tests: List[_Test], parallelism: int) -> bool:
-  """Run tests. Returns True if all tests succeeded."""
-  running_tests = set()
-  finished_tests = set()
-  tests_to_run = sorted(tests, reverse=True)
-  while tests_to_run or running_tests:
-    time.sleep(0.2)  # 200ms
-    updated_finished = set(t for t in running_tests if t.Finished())
-    running_tests = running_tests - updated_finished
-    while tests_to_run and len(running_tests) < parallelism:
-      t = tests_to_run.pop()
-      t.Run()
-      running_tests.add(t)
-
-    newly_finished = updated_finished - finished_tests
-    finished_tests.update(updated_finished)
-    for test in newly_finished:
-      logging.info("%s\t%s\t%.1fs", test,
-                   "PASSED" if test.Succeeded() else "FAILED",
-                   test.finish_time - test.begin_time)
-    if newly_finished:
-      logging.flush()
-
-  failed_tests = sorted([t for t in tests if not t.Succeeded()])
-  logging.info("Ran %d tests. %d failed.", len(tests), len(failed_tests))
-  logging.flush()
-
-  for ft in failed_tests:
-    ft.PrintLogs()
-
-  return not failed_tests
-
-
-def _GetPython() -> Text:
-  return FLAGS.python if FLAGS.python else sys.executable
-
-
-def _ParseShardedTests(
-    sharded_tests_arg: Optional[List[Text]]) -> Dict[Text, int]:
-  """Parses --sharded_tests argument."""
-  result = {}
-  if sharded_tests_arg is None:
+    """Run tests. Returns True if all tests succeeded."""
+    running_tests = set()
+    finished_tests = set()
+    tests_to_run = sorted(tests, reverse=True)
+    while tests_to_run or running_tests:
+        time.sleep(0.2)  # 200ms
+        updated_finished = set(t for t in running_tests if t.Finished())
+        running_tests = running_tests - updated_finished
+        while tests_to_run and len(running_tests) < parallelism:
+            t = tests_to_run.pop()
+            t.Run()
+            running_tests.add(t)
+
+        newly_finished = updated_finished - finished_tests
+        finished_tests.update(updated_finished)
+        for test in newly_finished:
+            logging.info(
+                "%s\t%s\t%.1fs",
+                test,
+                "PASSED" if test.Succeeded() else "FAILED",
+                test.finish_time - test.begin_time,
+            )
+        if newly_finished:
+            logging.flush()
+
+    failed_tests = sorted([t for t in tests if not t.Succeeded()])
+    logging.info("Ran %d tests. %d failed.", len(tests), len(failed_tests))
+    logging.flush()
+
+    for ft in failed_tests:
+        ft.PrintLogs()
+
+    return not failed_tests
+
+
+def _GetPython() -> str:
+    return FLAGS.python if FLAGS.python else sys.executable
+
+
+def _ParseShardedTests(sharded_tests_arg: Optional[List[str]]) -> Dict[str, int]:
+    """Parses --sharded_tests argument."""
+    result = {}
+    if sharded_tests_arg is None:
+        return result
+    for arg in sharded_tests_arg:
+        [file_name, num_shards_str] = arg.split(":")
+        num_shards = int(num_shards_str)
+        if num_shards <= 0:
+            raise ValueError(
+                "Invalid num_shards %d for test %s" % (num_shards, file_name)
+            )
+        result[file_name] = num_shards
     return result
-  for arg in sharded_tests_arg:
-    [file_name, num_shards_str] = arg.split(":")
-    num_shards = int(num_shards_str)
-    if num_shards <= 0:
-      raise ValueError("Invalid num_shards %d for test %s" %
-                       (num_shards, file_name))
-    result[file_name] = num_shards
-  return result
 
 
 def main(argv):
-  del argv
-  test_to_shards = _ParseShardedTests(FLAGS.sharded_tests)
-  tests = _DiscoverTests(FLAGS.start_dirs, test_to_shards)
-  if not tests:
-    logging.info(
-        "No tests were discovered. Please check that they are setup correctly.")
-    return -2
-  parallelism = FLAGS.parallelism
-  if parallelism is None:
-    parallelism = multiprocessing.cpu_count()
-  logging.info("Parallelism = %d", parallelism)
-  logging.info("Using Python: %s", _GetPython())
-  all_succeeded = _RunTests(tests, parallelism)
-  return 0 if all_succeeded else -1
+    del argv
+    test_to_shards = _ParseShardedTests(FLAGS.sharded_tests)
+    tests = _DiscoverTests(FLAGS.start_dirs, test_to_shards)
+    if not tests:
+        logging.info(
+            "No tests were discovered. Please check that they are setup correctly."
+        )
+        return -2
+    parallelism = FLAGS.parallelism
+    if parallelism is None:
+        parallelism = multiprocessing.cpu_count()
+    logging.info("Parallelism = %d", parallelism)
+    logging.info("Using Python: %s", _GetPython())
+    all_succeeded = _RunTests(tests, parallelism)
+    return 0 if all_succeeded else -1
 
 
 if __name__ == "__main__":
-  flags.mark_flag_as_required("start_dirs")
-  app.run(main)
+    flags.mark_flag_as_required("start_dirs")
+    app.run(main)
diff --git a/tfx_bsl/tfxio/csv_tfxio.py b/tfx_bsl/tfxio/csv_tfxio.py
index 158da8d1..ea223b12 100644
--- a/tfx_bsl/tfxio/csv_tfxio.py
+++ b/tfx_bsl/tfxio/csv_tfxio.py
@@ -19,271 +19,288 @@
 
 import apache_beam as beam
 import pyarrow as pa
-from tfx_bsl.coders import csv_decoder
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_representation_util
-from tfx_bsl.tfxio import tfxio
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import csv_decoder
+from tfx_bsl.tfxio import (
+    dataset_options,
+    record_based_tfxio,
+    tensor_adapter,
+    tensor_representation_util,
+    tfxio,
+)
+
 
 class _CsvTFXIOBase(record_based_tfxio.RecordBasedTFXIO):
-  """Base class for TFXIO implementations for CSV."""
-
-  def __init__(self,
-               physical_format: Text,
-               column_names: List[Text],
-               delimiter: Optional[Text] = ",",
-               skip_blank_lines: bool = True,
-               multivalent_columns: Optional[Text] = None,
-               secondary_delimiter: Optional[Text] = None,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[Text] = None,
-               telemetry_descriptors: Optional[List[Text]] = None):
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        raw_record_column_name=raw_record_column_name,
-        logical_format="csv",
-        physical_format=physical_format)
-    self._schema = schema
-    self._column_names = column_names
-    self._delimiter = delimiter
-    self._skip_blank_lines = skip_blank_lines
-    self._multivalent_columns = multivalent_columns
-    self._secondary_delimiter = secondary_delimiter
-    self._raw_record_column_name = raw_record_column_name
-    if schema is not None:
-      feature_names = [f.name for f in schema.feature]
-      if not set(feature_names).issubset(set(column_names)):
-        raise ValueError(
-            "Schema features are not a subset of column names: {} vs {}".format(
-                column_names, feature_names))
-    self._schema_projected = False
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return True
-
-  @abc.abstractmethod
-  def _CSVSource(self) -> beam.PTransform:
-    """Returns a PTtransform that producese PCollection[bytets]."""
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return self._CSVSource()
-
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
-      """Returns RecordBatch of csv lines."""
-
-      # Decode raw csv lines to record batches.
-      record_batches = (
-          raw_records_pcoll
-          | "BytesToStr" >> beam.Map(lambda b: b.decode())
-          | "CSVToRecordBatch" >> csv_decoder.CSVToRecordBatch(
-              column_names=self._column_names,
-              delimiter=self._delimiter,
-              skip_blank_lines=self._skip_blank_lines,
-              schema=self._schema,
-              desired_batch_size=batch_size,
-              multivalent_columns=self._multivalent_columns,
-              secondary_delimiter=self._secondary_delimiter,
-              raw_record_column_name=self._raw_record_column_name))
-
-      return record_batches
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    if not self._schema:
-      raise ValueError("TFMD schema not provided. Unable to derive an "
-                       "Arrow schema")
-    return csv_decoder.GetArrowSchema(
-        self._column_names,
-        self._schema)
-
-  def _TensorRepresentations(
-      self, merge_inferred) -> tensor_adapter.TensorRepresentations:
-    if merge_inferred:
-      return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-          self._schema)
-    result = (
-        tensor_representation_util.GetTensorRepresentationsFromSchema(
-            self._schema))
-    if result is None:
-      result = (
-          tensor_representation_util.InferTensorRepresentationsFromSchema(
-              self._schema))
-    return result
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return self._TensorRepresentations(not self._schema_projected)
-
-  def _ProjectTfmdSchemaTensorRepresentation(
-      self, tensor_names: List[Text]) -> schema_pb2.Schema:
-    """Creates the tensor representation for choosen tensor_names."""
-    tensor_representations = self._TensorRepresentations(False)
-    tensor_names = set(tensor_names)
-
-    # The columns in the schema will remain the same, because the csv decoder
-    # will need to decode all columns no matter what.
-    result = schema_pb2.Schema()
-    result.CopyFrom(self._schema)
-
-    # The tensor representation will only contain the projected columns, so the
-    # output tensors will only be the projected columns.
-    tensor_representation_util.SetTensorRepresentationsInSchema(
-        result,
-        {k: v for k, v in tensor_representations.items() if k in tensor_names})
-
-    return result
-
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    """Returns a projected TFXIO.
-
-    Project in csv should not be used for optimization, since the decoder must
-    read all columns no matter what.
-
-    The Projected TFXIO will not project the record batches, arrow schema,
-    nor the tfmd schema. Only the tensor representation, and the resulting
-    tensors will be projected.
-
-    Args:
-      tensor_names: The columns to project.
-    """
-    projected_schema = self._ProjectTfmdSchemaTensorRepresentation(tensor_names)
-    result = copy.copy(self)
-    result._schema = projected_schema  # pylint: disable=protected-access
-    result._schema_projected = True  # pylint: disable=protected-access
-    return result
+    """Base class for TFXIO implementations for CSV."""
+
+    def __init__(
+        self,
+        physical_format: str,
+        column_names: List[str],
+        delimiter: Optional[str] = ",",
+        skip_blank_lines: bool = True,
+        multivalent_columns: Optional[str] = None,
+        secondary_delimiter: Optional[str] = None,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+        telemetry_descriptors: Optional[List[str]] = None,
+    ):
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            raw_record_column_name=raw_record_column_name,
+            logical_format="csv",
+            physical_format=physical_format,
+        )
+        self._schema = schema
+        self._column_names = column_names
+        self._delimiter = delimiter
+        self._skip_blank_lines = skip_blank_lines
+        self._multivalent_columns = multivalent_columns
+        self._secondary_delimiter = secondary_delimiter
+        self._raw_record_column_name = raw_record_column_name
+        if schema is not None:
+            feature_names = [f.name for f in schema.feature]
+            if not set(feature_names).issubset(set(column_names)):
+                raise ValueError(
+                    f"Schema features are not a subset of column names: {column_names} vs {feature_names}"
+                )
+        self._schema_projected = False
+
+    def SupportAttachingRawRecords(self) -> bool:
+        return True
+
+    @abc.abstractmethod
+    def _CSVSource(self) -> beam.PTransform:
+        """Returns a PTtransform that producese PCollection[bytets]."""
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return self._CSVSource()
+
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
+            """Returns RecordBatch of csv lines."""
+            # Decode raw csv lines to record batches.
+            record_batches = (
+                raw_records_pcoll
+                | "BytesToStr" >> beam.Map(lambda b: b.decode())
+                | "CSVToRecordBatch"
+                >> csv_decoder.CSVToRecordBatch(
+                    column_names=self._column_names,
+                    delimiter=self._delimiter,
+                    skip_blank_lines=self._skip_blank_lines,
+                    schema=self._schema,
+                    desired_batch_size=batch_size,
+                    multivalent_columns=self._multivalent_columns,
+                    secondary_delimiter=self._secondary_delimiter,
+                    raw_record_column_name=self._raw_record_column_name,
+                )
+            )
+
+            return record_batches
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        if not self._schema:
+            raise ValueError(
+                "TFMD schema not provided. Unable to derive an " "Arrow schema"
+            )
+        return csv_decoder.GetArrowSchema(self._column_names, self._schema)
+
+    def _TensorRepresentations(
+        self, merge_inferred
+    ) -> tensor_adapter.TensorRepresentations:
+        if merge_inferred:
+            return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
+                self._schema
+            )
+        result = tensor_representation_util.GetTensorRepresentationsFromSchema(
+            self._schema
+        )
+        if result is None:
+            result = tensor_representation_util.InferTensorRepresentationsFromSchema(
+                self._schema
+            )
+        return result
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return self._TensorRepresentations(not self._schema_projected)
+
+    def _ProjectTfmdSchemaTensorRepresentation(
+        self, tensor_names: List[str]
+    ) -> schema_pb2.Schema:
+        """Creates the tensor representation for choosen tensor_names."""
+        tensor_representations = self._TensorRepresentations(False)
+        tensor_names = set(tensor_names)
+
+        # The columns in the schema will remain the same, because the csv decoder
+        # will need to decode all columns no matter what.
+        result = schema_pb2.Schema()
+        result.CopyFrom(self._schema)
+
+        # The tensor representation will only contain the projected columns, so the
+        # output tensors will only be the projected columns.
+        tensor_representation_util.SetTensorRepresentationsInSchema(
+            result,
+            {k: v for k, v in tensor_representations.items() if k in tensor_names},
+        )
+
+        return result
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        """Returns a projected TFXIO.
+
+        Project in csv should not be used for optimization, since the decoder must
+        read all columns no matter what.
+
+        The Projected TFXIO will not project the record batches, arrow schema,
+        nor the tfmd schema. Only the tensor representation, and the resulting
+        tensors will be projected.
+
+        Args:
+        ----
+          tensor_names: The columns to project.
+        """
+        projected_schema = self._ProjectTfmdSchemaTensorRepresentation(tensor_names)
+        result = copy.copy(self)
+        result._schema = projected_schema  # pylint: disable=protected-access
+        result._schema_projected = True  # pylint: disable=protected-access
+        return result
 
 
 class BeamRecordCsvTFXIO(_CsvTFXIOBase):
-  """TFXIO implementation for CSV records in pcoll[bytes].
+    """TFXIO implementation for CSV records in pcoll[bytes].
 
-  This is a special TFXIO that does not actually do I/O -- it relies on the
-  caller to prepare a PCollection of bytes.
-  """
+    This is a special TFXIO that does not actually do I/O -- it relies on the
+    caller to prepare a PCollection of bytes.
+    """
 
-  # inherits the initializer from the base.
+    # inherits the initializer from the base.
 
-  def _CSVSource(self) -> beam.PTransform:
-    return (beam.ptransform_fn(lambda x: x)()
+    def _CSVSource(self) -> beam.PTransform:
+        return (
+            beam.ptransform_fn(lambda x: x)()
             .with_input_types(bytes)
-            .with_output_types(bytes))
+            .with_output_types(bytes)
+        )
 
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
 
-  def TensorFlowDataset(self,
-                        options: dataset_options.TensorFlowDatasetOptions):
-    raise NotImplementedError
+    def TensorFlowDataset(self, options: dataset_options.TensorFlowDatasetOptions):
+        raise NotImplementedError
 
 
 class CsvTFXIO(_CsvTFXIOBase):
-  """TFXIO implementation for CSV."""
-
-  def __init__(self,
-               file_pattern: Text,
-               column_names: List[Text],
-               telemetry_descriptors: Optional[List[Text]] = None,
-               validate: bool = True,
-               delimiter: Optional[Text] = ",",
-               skip_blank_lines: Optional[bool] = True,
-               multivalent_columns: Optional[Text] = None,
-               secondary_delimiter: Optional[Text] = None,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[Text] = None,
-               skip_header_lines: int = 0):
-    """Initializes a CSV TFXIO.
-
-    Args:
-      file_pattern: A file glob pattern to read csv files from.
-      column_names: List of csv column names. Order must match the order in the
-        CSV file.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      validate: Boolean flag to verify that the files exist during the pipeline
-        creation time.
-      delimiter: A one-character string used to separate fields.
-      skip_blank_lines: A boolean to indicate whether to skip over blank lines
-        rather than interpreting them as missing values.
-      multivalent_columns: Name of column that can contain multiple values. If
-        secondary_delimiter is provided, this must also be provided.
-      secondary_delimiter: Delimiter used for parsing multivalent columns. If
-        multivalent_columns is provided, this must also be provided.
-      schema: An optional TFMD Schema describing the dataset. If schema is
-        provided, it will determine the data type of the csv columns. Otherwise,
-        the each column's data type will be inferred by the csv decoder. The
-        schema should contain exactly the same features as column_names.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains raw csv rows.
-      skip_header_lines: Number of header lines to skip. Same number is
-        skipped from each file. Must be 0 or higher. Large number of
-        skipped lines might impact performance.
-    """
-    super().__init__(
-        column_names=column_names,
-        delimiter=delimiter,
-        skip_blank_lines=skip_blank_lines,
-        multivalent_columns=multivalent_columns,
-        secondary_delimiter=secondary_delimiter,
-        schema=schema,
-        raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format="text")
-    self._file_pattern = file_pattern
-    self._validate = validate
-    self._skip_header_lines = skip_header_lines
-
-  def _CSVSource(self) -> beam.PTransform:
-    """Returns a PTtransform that producese PCollection[bytes]."""
-    return beam.io.ReadFromText(
-        self._file_pattern,
-        coder=beam.coders.BytesCoder(),
-        validate=self._validate,
-        skip_header_lines=self._skip_header_lines)
-
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    """Returns a projected TFXIO.
-
-    Project in csv should not be used for optimization, since the decoder must
-    read all columns no matter what.
-
-    The Projected TFXIO will not project the record batches, arrow schema,
-    nor the tfmd schema. Only the tensor representation, and the resulting
-    tensors will be projected.
-
-    Args:
-      tensor_names: The columns to project.
-    """
-    projected_schema = self._ProjectTfmdSchemaTensorRepresentation(tensor_names)
-    result = CsvTFXIO(
-        file_pattern=self._file_pattern,
-        column_names=self._column_names,
-        validate=self._validate,
-        delimiter=self._delimiter,
-        skip_blank_lines=self._skip_blank_lines,
-        multivalent_columns=self._multivalent_columns,
-        secondary_delimiter=self._secondary_delimiter,
-        schema=projected_schema,
-        raw_record_column_name=self._raw_record_column_name,
-        telemetry_descriptors=self.telemetry_descriptors,
-        skip_header_lines=self._skip_header_lines)
-    result._schema_projected = True  # pylint: disable=protected-access
-    return result
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def TensorFlowDataset(self,
-                        options: dataset_options.TensorFlowDatasetOptions):
-    raise NotImplementedError
+    """TFXIO implementation for CSV."""
+
+    def __init__(
+        self,
+        file_pattern: str,
+        column_names: List[str],
+        telemetry_descriptors: Optional[List[str]] = None,
+        validate: bool = True,
+        delimiter: Optional[str] = ",",
+        skip_blank_lines: Optional[bool] = True,
+        multivalent_columns: Optional[str] = None,
+        secondary_delimiter: Optional[str] = None,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+        skip_header_lines: int = 0,
+    ):
+        """Initializes a CSV TFXIO.
+
+        Args:
+        ----
+          file_pattern: A file glob pattern to read csv files from.
+          column_names: List of csv column names. Order must match the order in the
+            CSV file.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          validate: Boolean flag to verify that the files exist during the pipeline
+            creation time.
+          delimiter: A one-character string used to separate fields.
+          skip_blank_lines: A boolean to indicate whether to skip over blank lines
+            rather than interpreting them as missing values.
+          multivalent_columns: Name of column that can contain multiple values. If
+            secondary_delimiter is provided, this must also be provided.
+          secondary_delimiter: Delimiter used for parsing multivalent columns. If
+            multivalent_columns is provided, this must also be provided.
+          schema: An optional TFMD Schema describing the dataset. If schema is
+            provided, it will determine the data type of the csv columns. Otherwise,
+            the each column's data type will be inferred by the csv decoder. The
+            schema should contain exactly the same features as column_names.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains raw csv rows.
+          skip_header_lines: Number of header lines to skip. Same number is
+            skipped from each file. Must be 0 or higher. Large number of
+            skipped lines might impact performance.
+        """
+        super().__init__(
+            column_names=column_names,
+            delimiter=delimiter,
+            skip_blank_lines=skip_blank_lines,
+            multivalent_columns=multivalent_columns,
+            secondary_delimiter=secondary_delimiter,
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format="text",
+        )
+        self._file_pattern = file_pattern
+        self._validate = validate
+        self._skip_header_lines = skip_header_lines
+
+    def _CSVSource(self) -> beam.PTransform:
+        """Returns a PTtransform that producese PCollection[bytes]."""
+        return beam.io.ReadFromText(
+            self._file_pattern,
+            coder=beam.coders.BytesCoder(),
+            validate=self._validate,
+            skip_header_lines=self._skip_header_lines,
+        )
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        """Returns a projected TFXIO.
+
+        Project in csv should not be used for optimization, since the decoder must
+        read all columns no matter what.
+
+        The Projected TFXIO will not project the record batches, arrow schema,
+        nor the tfmd schema. Only the tensor representation, and the resulting
+        tensors will be projected.
+
+        Args:
+        ----
+          tensor_names: The columns to project.
+        """
+        projected_schema = self._ProjectTfmdSchemaTensorRepresentation(tensor_names)
+        result = CsvTFXIO(
+            file_pattern=self._file_pattern,
+            column_names=self._column_names,
+            validate=self._validate,
+            delimiter=self._delimiter,
+            skip_blank_lines=self._skip_blank_lines,
+            multivalent_columns=self._multivalent_columns,
+            secondary_delimiter=self._secondary_delimiter,
+            schema=projected_schema,
+            raw_record_column_name=self._raw_record_column_name,
+            telemetry_descriptors=self.telemetry_descriptors,
+            skip_header_lines=self._skip_header_lines,
+        )
+        result._schema_projected = True  # pylint: disable=protected-access
+        return result
+
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def TensorFlowDataset(self, options: dataset_options.TensorFlowDatasetOptions):
+        raise NotImplementedError
diff --git a/tfx_bsl/tfxio/csv_tfxio_test.py b/tfx_bsl/tfxio/csv_tfxio_test.py
index 661eb13b..1b851330 100644
--- a/tfx_bsl/tfxio/csv_tfxio_test.py
+++ b/tfx_bsl/tfxio/csv_tfxio_test.py
@@ -13,21 +13,20 @@
 # limitations under the License.
 """Tests for tfx_bsl.tfxio.csv."""
 
-import pytest
 import os
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.tfxio import csv_tfxio
-from tfx_bsl.tfxio import telemetry_test_util
+from absl import flags
+from absl.testing import absltest, parameterized
+from apache_beam.testing import util as beam_testing_util
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import csv_tfxio, telemetry_test_util
+
 FLAGS = flags.FLAGS
 
 _COLUMN_NAMES = ["int_feature", "float_feature", "string_feature"]
@@ -58,7 +57,9 @@
       max: 2
     }
   }
-""", schema_pb2.Schema())
+""",
+    schema_pb2.Schema(),
+)
 
 _UNORDERED_SCHEMA = text_format.Parse(
     """
@@ -86,7 +87,9 @@
       max: 2
     }
   }
-""", schema_pb2.Schema())
+""",
+    schema_pb2.Schema(),
+)
 
 _TELEMETRY_DESCRIPTORS = ["Some", "Component"]
 
@@ -123,31 +126,35 @@
           max: 2
         }
       }
-    """, schema_pb2.Schema()))
+    """,
+            schema_pb2.Schema(),
+        ),
+    )
 ]
 
-_EXPECTED_ARROW_SCHEMA = pa.schema([
-    pa.field("int_feature", pa.large_list(pa.int64())),
-    pa.field("float_feature", pa.large_list(pa.float32())),
-    pa.field("string_feature", pa.large_list(pa.large_binary()))
-])
+_EXPECTED_ARROW_SCHEMA = pa.schema(
+    [
+        pa.field("int_feature", pa.large_list(pa.int64())),
+        pa.field("float_feature", pa.large_list(pa.float32())),
+        pa.field("string_feature", pa.large_list(pa.large_binary())),
+    ]
+)
 
 _EXPECTED_COLUMN_VALUES = {
-    "int_feature":
-        pa.array([[1], [2]], type=pa.large_list(pa.int64())),
-    "float_feature":
-        pa.array([[2.0], [3.0]], type=pa.large_list(pa.float32())),
-    "string_feature":
-        pa.array([[b"abc"], [b"xyz"]], type=pa.large_list(pa.large_binary())),
+    "int_feature": pa.array([[1], [2]], type=pa.large_list(pa.int64())),
+    "float_feature": pa.array([[2.0], [3.0]], type=pa.large_list(pa.float32())),
+    "string_feature": pa.array(
+        [[b"abc"], [b"xyz"]], type=pa.large_list(pa.large_binary())
+    ),
 }
 
 
 def _WriteInputs(filename, include_header_line=False):
-  with tf.io.gfile.GFile(filename, "w") as out_file:
-    if include_header_line:
-      out_file.write("HEADER_LINE\n")
-    for row in _ROWS:
-      out_file.write(row)
+    with tf.io.gfile.GFile(filename, "w") as out_file:
+        if include_header_line:
+            out_file.write("HEADER_LINE\n")
+        for row in _ROWS:
+            out_file.write(row)
 
 
 _CSV_TFXIO_IMPL_TEST_CASES = [
@@ -158,266 +165,299 @@ def _WriteInputs(filename, include_header_line=False):
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class CsvRecordTest(parameterized.TestCase):
-
-  @classmethod
-  def setUpClass(cls):
-    super().setUpClass()
-    cls._example_file = os.path.join(FLAGS.test_tmpdir, "csvtexttest",
-                                     "input.csv")
-    tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
-    _WriteInputs(cls._example_file)
-    cls._example_file_with_header_line = os.path.join(
-        FLAGS.test_tmpdir, "csvtexttest", "input_with_header_line.csv")
-    _WriteInputs(cls._example_file_with_header_line,
-                 include_header_line=True)
-
-  def _MakeTFXIO(self, column_names, schema=None, raw_record_column_name=None,
-                 skip_header_lines=0, use_input_with_header_line=False,
-                 make_beam_record_tfxio=False):
-    assert not make_beam_record_tfxio or not use_input_with_header_line, (
-        "Invalid _MakeTFXIO parameter combination")
-    if make_beam_record_tfxio:
-      return csv_tfxio.BeamRecordCsvTFXIO(
-          physical_format=_EXPECTED_PHYSICAL_FORMAT,
-          column_names=column_names,
-          schema=schema,
-          raw_record_column_name=raw_record_column_name,
-          telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-    input_file = (
-        self._example_file_with_header_line
-        if use_input_with_header_line else self._example_file)
-    return csv_tfxio.CsvTFXIO(
-        file_pattern=input_file,
-        column_names=column_names,
-        schema=schema,
-        raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
-        skip_header_lines=skip_header_lines)
-
-  def _MakePipelineInputs(self, pipeline, use_beam_record_csv_tfxio=False):
-    if use_beam_record_csv_tfxio:
-      return pipeline | "CreateCSVLines" >> beam.Create(
-          _RAW_RECORDS, reshuffle=False)
-    return pipeline
-
-  def _ValidateRecordBatch(self,
-                           record_batch,
-                           raw_record_column_name=None):
-    self.assertIsInstance(record_batch, pa.RecordBatch)
-    self.assertEqual(record_batch.num_rows, 2)
-    expected_schema = _EXPECTED_ARROW_SCHEMA
-    if raw_record_column_name is not None:
-      expected_schema = pa.schema(
-          list(expected_schema) +
-          [pa.field(raw_record_column_name, pa.large_list(pa.large_binary()))])
-    self.assertTrue(
-        record_batch.schema.equals(expected_schema),
-        "Expected: {} ; got {}".format(expected_schema,
-                                       record_batch.schema))
-    for i, field in enumerate(record_batch.schema):
-      if field.name == raw_record_column_name:
-        continue
-      self.assertTrue(
-          record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[field.name]),
-          "Column {} did not match ({} vs {}).".format(
-              field.name, record_batch.column(i),
-              _EXPECTED_COLUMN_VALUES[field.name]))
-
-    if raw_record_column_name is not None:
-      self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
-      self.assertEqual(record_batch.columns[-1].flatten().to_pylist(),
-                       _RAW_RECORDS)
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testImplicitTensorRepresentations(self, use_beam_record_csv_tfxio):
-    """Tests inferring of tensor representation."""
-    tfxio = self._MakeTFXIO(_COLUMN_NAMES, schema=_SCHEMA,
-                            make_beam_record_tfxio=use_beam_record_csv_tfxio)
-    self.assertEqual(
-        {
-            "int_feature":
-                text_format.Parse(
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._example_file = os.path.join(FLAGS.test_tmpdir, "csvtexttest", "input.csv")
+        tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
+        _WriteInputs(cls._example_file)
+        cls._example_file_with_header_line = os.path.join(
+            FLAGS.test_tmpdir, "csvtexttest", "input_with_header_line.csv"
+        )
+        _WriteInputs(cls._example_file_with_header_line, include_header_line=True)
+
+    def _MakeTFXIO(
+        self,
+        column_names,
+        schema=None,
+        raw_record_column_name=None,
+        skip_header_lines=0,
+        use_input_with_header_line=False,
+        make_beam_record_tfxio=False,
+    ):
+        assert (
+            not make_beam_record_tfxio or not use_input_with_header_line
+        ), "Invalid _MakeTFXIO parameter combination"
+        if make_beam_record_tfxio:
+            return csv_tfxio.BeamRecordCsvTFXIO(
+                physical_format=_EXPECTED_PHYSICAL_FORMAT,
+                column_names=column_names,
+                schema=schema,
+                raw_record_column_name=raw_record_column_name,
+                telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+            )
+
+        input_file = (
+            self._example_file_with_header_line
+            if use_input_with_header_line
+            else self._example_file
+        )
+        return csv_tfxio.CsvTFXIO(
+            file_pattern=input_file,
+            column_names=column_names,
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+            skip_header_lines=skip_header_lines,
+        )
+
+    def _MakePipelineInputs(self, pipeline, use_beam_record_csv_tfxio=False):
+        if use_beam_record_csv_tfxio:
+            return pipeline | "CreateCSVLines" >> beam.Create(
+                _RAW_RECORDS, reshuffle=False
+            )
+        return pipeline
+
+    def _ValidateRecordBatch(self, record_batch, raw_record_column_name=None):
+        self.assertIsInstance(record_batch, pa.RecordBatch)
+        self.assertEqual(record_batch.num_rows, 2)
+        expected_schema = _EXPECTED_ARROW_SCHEMA
+        if raw_record_column_name is not None:
+            expected_schema = pa.schema(
+                list(expected_schema)
+                + [pa.field(raw_record_column_name, pa.large_list(pa.large_binary()))]
+            )
+        self.assertTrue(
+            record_batch.schema.equals(expected_schema),
+            f"Expected: {expected_schema} ; got {record_batch.schema}",
+        )
+        for i, field in enumerate(record_batch.schema):
+            if field.name == raw_record_column_name:
+                continue
+            self.assertTrue(
+                record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[field.name]),
+                f"Column {field.name} did not match ({record_batch.column(i)} vs {_EXPECTED_COLUMN_VALUES[field.name]}).",
+            )
+
+        if raw_record_column_name is not None:
+            self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
+            self.assertEqual(
+                record_batch.columns[-1].flatten().to_pylist(), _RAW_RECORDS
+            )
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testImplicitTensorRepresentations(self, use_beam_record_csv_tfxio):
+        """Tests inferring of tensor representation."""
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES,
+            schema=_SCHEMA,
+            make_beam_record_tfxio=use_beam_record_csv_tfxio,
+        )
+        self.assertEqual(
+            {
+                "int_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "int_feature"}""",
-                    schema_pb2.TensorRepresentation()),
-            "float_feature":
-                text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "float_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "float_feature"}""",
-                    schema_pb2.TensorRepresentation()),
-            "string_feature":
-                text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "string_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "string_feature" }""",
-                    schema_pb2.TensorRepresentation()),
-        }, tfxio.TensorRepresentations())
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 3)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("string_feature", dict_of_tensors)
-
-    p = beam.Pipeline()
-    record_batch_pcoll = (
-        self._MakePipelineInputs(p, use_beam_record_csv_tfxio)
-        | tfxio.BeamSource(batch_size=len(_ROWS)))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        _TELEMETRY_DESCRIPTORS, "csv",
-                                        _EXPECTED_PHYSICAL_FORMAT)
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testExplicitTensorRepresentations(self, use_beam_record_csv_tfxio):
-    """Tests when the tensor representation is explicitely provided in the schema."""
-    schema = schema_pb2.Schema()
-    schema.CopyFrom(_SCHEMA)
-    tensor_representations = {
-        "my_feature":
-            text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+            },
+            tfxio.TensorRepresentations(),
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 3)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("string_feature", dict_of_tensors)
+
+        p = beam.Pipeline()
+        record_batch_pcoll = self._MakePipelineInputs(
+            p, use_beam_record_csv_tfxio
+        ) | tfxio.BeamSource(batch_size=len(_ROWS))
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self,
+            pipeline_result,
+            _TELEMETRY_DESCRIPTORS,
+            "csv",
+            _EXPECTED_PHYSICAL_FORMAT,
+        )
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testExplicitTensorRepresentations(self, use_beam_record_csv_tfxio):
+        """Tests when the tensor representation is explicitely provided in the schema."""
+        schema = schema_pb2.Schema()
+        schema.CopyFrom(_SCHEMA)
+        tensor_representations = {
+            "my_feature": text_format.Parse(
                 """
             dense_tensor {
              column_name: "string_feature"
              shape { dim { size: 1 } }
              default_value { bytes_value: "abc" }
-           }""", schema_pb2.TensorRepresentation())
-    }
-    schema.tensor_representation_group[""].CopyFrom(
-        schema_pb2.TensorRepresentationGroup(
-            tensor_representation=tensor_representations))
-
-    tfxio = self._MakeTFXIO(
-        _COLUMN_NAMES, schema=schema,
-        make_beam_record_tfxio=use_beam_record_csv_tfxio)
-
-    expected_tensor_representations = {
-        "int_feature":
-            text_format.Parse(
+           }""",
+                schema_pb2.TensorRepresentation(),
+            )
+        }
+        schema.tensor_representation_group[""].CopyFrom(
+            schema_pb2.TensorRepresentationGroup(
+                tensor_representation=tensor_representations
+            )
+        )
+
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES,
+            schema=schema,
+            make_beam_record_tfxio=use_beam_record_csv_tfxio,
+        )
+
+        expected_tensor_representations = {
+            "int_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "int_feature"}""",
-                schema_pb2.TensorRepresentation()),
-        "float_feature":
-            text_format.Parse(
+                schema_pb2.TensorRepresentation(),
+            ),
+            "float_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "float_feature"}""",
-                schema_pb2.TensorRepresentation()),
-    }
-    expected_tensor_representations.update(tensor_representations)
-
-    self.assertEqual(expected_tensor_representations,
-                     tfxio.TensorRepresentations())
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testProjection(self, use_beam_record_csv_tfxio):
-    """Test projecting of a TFXIO."""
-    tfxio = self._MakeTFXIO(
-        _COLUMN_NAMES, schema=_SCHEMA,
-        make_beam_record_tfxio=use_beam_record_csv_tfxio)
-
-    projected_tfxio = tfxio.Project(["int_feature"])
-
-    # The projected_tfxio still has original schema
-    self.assertTrue(projected_tfxio.ArrowSchema().equals(
-        _EXPECTED_ARROW_SCHEMA))
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-      expected_schema = projected_tfxio.ArrowSchema()
-      self.assertTrue(
-          record_batch.schema.equals(expected_schema),
-          "actual: {}; expected: {}".format(record_batch.schema,
-                                            expected_schema))
-      tensor_adapter = projected_tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertLen(dict_of_tensors, 1)
-
-    p = beam.Pipeline()
-    record_batch_pcoll = (
-        self._MakePipelineInputs(p, use_beam_record_csv_tfxio)
-        | tfxio.BeamSource(batch_size=len(_ROWS)))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        _TELEMETRY_DESCRIPTORS, "csv",
-                                        _EXPECTED_PHYSICAL_FORMAT)
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testAttachRawRecordColumn(self, use_beam_record_csv_tfxio):
-    raw_record_column_name = "raw_records"
-    tfxio = self._MakeTFXIO(
-        _COLUMN_NAMES,
-        schema=_SCHEMA,
-        raw_record_column_name=raw_record_column_name,
-        make_beam_record_tfxio=use_beam_record_csv_tfxio)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, raw_record_column_name)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          self._MakePipelineInputs(p, use_beam_record_csv_tfxio)
-          | tfxio.BeamSource(batch_size=len(_ROWS)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testOptionalSchema(self, use_beam_record_csv_tfxio):
-    """Tests when the schema is not provided."""
-    tfxio = self._MakeTFXIO(
-        _COLUMN_NAMES, make_beam_record_tfxio=use_beam_record_csv_tfxio)
-    with self.assertRaisesRegex(ValueError, ".*TFMD schema not provided.*"):
-      tfxio.ArrowSchema()
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          self._MakePipelineInputs(p, use_beam_record_csv_tfxio)
-          | tfxio.BeamSource(batch_size=len(_ROWS)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
-  def testUnorderedSchema(self, use_beam_record_csv_tfxio):
-    """Tests various valid schemas."""
-    tfxio = self._MakeTFXIO(_COLUMN_NAMES, schema=_UNORDERED_SCHEMA,
-                            make_beam_record_tfxio=use_beam_record_csv_tfxio)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          self._MakePipelineInputs(p, use_beam_record_csv_tfxio)
-          | tfxio.BeamSource(batch_size=len(_ROWS)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testSkipHeaderLines(self):
-    tfxio = self._MakeTFXIO(_COLUMN_NAMES,
-                            use_input_with_header_line=True,
-                            skip_header_lines=1)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
+        expected_tensor_representations.update(tensor_representations)
+
+        self.assertEqual(expected_tensor_representations, tfxio.TensorRepresentations())
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testProjection(self, use_beam_record_csv_tfxio):
+        """Test projecting of a TFXIO."""
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES,
+            schema=_SCHEMA,
+            make_beam_record_tfxio=use_beam_record_csv_tfxio,
+        )
+
+        projected_tfxio = tfxio.Project(["int_feature"])
+
+        # The projected_tfxio still has original schema
+        self.assertTrue(projected_tfxio.ArrowSchema().equals(_EXPECTED_ARROW_SCHEMA))
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+            expected_schema = projected_tfxio.ArrowSchema()
+            self.assertTrue(
+                record_batch.schema.equals(expected_schema),
+                f"actual: {record_batch.schema}; expected: {expected_schema}",
+            )
+            tensor_adapter = projected_tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertLen(dict_of_tensors, 1)
+
+        p = beam.Pipeline()
+        record_batch_pcoll = self._MakePipelineInputs(
+            p, use_beam_record_csv_tfxio
+        ) | tfxio.BeamSource(batch_size=len(_ROWS))
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self,
+            pipeline_result,
+            _TELEMETRY_DESCRIPTORS,
+            "csv",
+            _EXPECTED_PHYSICAL_FORMAT,
+        )
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testAttachRawRecordColumn(self, use_beam_record_csv_tfxio):
+        raw_record_column_name = "raw_records"
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES,
+            schema=_SCHEMA,
+            raw_record_column_name=raw_record_column_name,
+            make_beam_record_tfxio=use_beam_record_csv_tfxio,
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, raw_record_column_name)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = self._MakePipelineInputs(
+                p, use_beam_record_csv_tfxio
+            ) | tfxio.BeamSource(batch_size=len(_ROWS))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testOptionalSchema(self, use_beam_record_csv_tfxio):
+        """Tests when the schema is not provided."""
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES, make_beam_record_tfxio=use_beam_record_csv_tfxio
+        )
+        with self.assertRaisesRegex(ValueError, ".*TFMD schema not provided.*"):
+            tfxio.ArrowSchema()
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = self._MakePipelineInputs(
+                p, use_beam_record_csv_tfxio
+            ) | tfxio.BeamSource(batch_size=len(_ROWS))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    @parameterized.named_parameters(*_CSV_TFXIO_IMPL_TEST_CASES)
+    def testUnorderedSchema(self, use_beam_record_csv_tfxio):
+        """Tests various valid schemas."""
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES,
+            schema=_UNORDERED_SCHEMA,
+            make_beam_record_tfxio=use_beam_record_csv_tfxio,
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = self._MakePipelineInputs(
+                p, use_beam_record_csv_tfxio
+            ) | tfxio.BeamSource(batch_size=len(_ROWS))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testSkipHeaderLines(self):
+        tfxio = self._MakeTFXIO(
+            _COLUMN_NAMES, use_input_with_header_line=True, skip_header_lines=1
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_ROWS))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/dataset_options.py b/tfx_bsl/tfxio/dataset_options.py
index 7f82e82b..10e13415 100644
--- a/tfx_bsl/tfxio/dataset_options.py
+++ b/tfx_bsl/tfxio/dataset_options.py
@@ -14,124 +14,157 @@
 """Dataset options for tfxio."""
 
 from typing import NamedTuple, Optional
+
 import tensorflow as tf
 
 
 class TensorFlowDatasetOptions(
-    NamedTuple('TensorFlowDatasetOptions', [
-        ('batch_size', int),
-        ('drop_final_batch', bool),
-        ('num_epochs', Optional[int]),
-        ('shuffle', bool),
-        ('shuffle_buffer_size', int),
-        ('shuffle_seed', Optional[int]),
-        ('prefetch_buffer_size', int),
-        ('reader_num_threads', int),
-        ('parser_num_threads', int),
-        ('sloppy_ordering', bool),
-        ('label_key', Optional[str]),
-    ])):
-  """Options for TFXIO's TensorFlowDataset.
+    NamedTuple(
+        "TensorFlowDatasetOptions",
+        [
+            ("batch_size", int),
+            ("drop_final_batch", bool),
+            ("num_epochs", Optional[int]),
+            ("shuffle", bool),
+            ("shuffle_buffer_size", int),
+            ("shuffle_seed", Optional[int]),
+            ("prefetch_buffer_size", int),
+            ("reader_num_threads", int),
+            ("parser_num_threads", int),
+            ("sloppy_ordering", bool),
+            ("label_key", Optional[str]),
+        ],
+    )
+):
+    """Options for TFXIO's TensorFlowDataset.
 
-  Note: not all of these options may be effective. It depends on the particular
-  TFXIO's implementation.
-  """
+    Note: not all of these options may be effective. It depends on the particular
+    TFXIO's implementation.
+    """
 
-  def __new__(cls,
-              batch_size: int,
-              drop_final_batch: bool = False,
-              num_epochs: Optional[int] = None,
-              shuffle: bool = True,
-              shuffle_buffer_size: int = 10000,
-              shuffle_seed: Optional[int] = None,
-              prefetch_buffer_size: int = tf.data.experimental.AUTOTUNE,
-              reader_num_threads: int = tf.data.experimental.AUTOTUNE,
-              parser_num_threads: int = tf.data.experimental.AUTOTUNE,
-              sloppy_ordering: bool = False,
-              label_key: Optional[str] = None):
-    """Returns a dataset options object.
+    def __new__(
+        cls,
+        batch_size: int,
+        drop_final_batch: bool = False,
+        num_epochs: Optional[int] = None,
+        shuffle: bool = True,
+        shuffle_buffer_size: int = 10000,
+        shuffle_seed: Optional[int] = None,
+        prefetch_buffer_size: int = tf.data.experimental.AUTOTUNE,
+        reader_num_threads: int = tf.data.experimental.AUTOTUNE,
+        parser_num_threads: int = tf.data.experimental.AUTOTUNE,
+        sloppy_ordering: bool = False,
+        label_key: Optional[str] = None,
+    ):
+        """Returns a dataset options object.
 
-    Args:
-      batch_size: An int representing the number of records to combine in a
-        single batch.
-      drop_final_batch: If `True`, and the batch size does not evenly divide the
-        input dataset size, the final smaller batch will be dropped. Defaults to
-        `False`.
-      num_epochs: Integer specifying the number of times to read through the
-        dataset. If None, cycles through the dataset forever. Defaults to
-        `None`.
-      shuffle: A boolean, indicates whether the input should be shuffled.
-        Defaults to `True`.
-      shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
-        number of items (i.e. records for a record based TFXIO) to hold. Only
-        data read into the buffer will be shuffled (there is no shuffling across
-        buffers). A large capacity ensures better shuffling but would increase
-        memory usage and startup time.
-      shuffle_seed: Randomization seed to use for shuffling.
-      prefetch_buffer_size: Number of feature batches to prefetch in order to
-        improve performance. Recommended value is the number of batches consumed
-        per training step. Defaults to auto-tune.
-      reader_num_threads: Number of threads used to read records. If >1, the
-        results will be interleaved. Defaults to tf.data.experimental.AUTOTUNE.
-      parser_num_threads: Number of threads to use for parsing `Example` tensors
-        into a dictionary of `Feature` tensors (if applicable). Defaults to
-        auto-tune.
-      sloppy_ordering: If `True`, reading performance will be improved at the
-        cost of non-deterministic ordering. If `False`, the order of elements
-        produced is deterministic prior to shuffling (elements are still
-        randomized if `shuffle=True`. Note that if the seed is set, then order
-        of elements after shuffling is deterministic). Defaults to False.
-      label_key: name of the label tensor. If provided, the returned dataset
-        will yield Tuple[Dict[str, Tensor], Tensor], where the second term in
-        the tuple is the label tensor and the dict (the first term) will not
-        contain the label feature.
-    """
-    return super().__new__(cls, batch_size, drop_final_batch, num_epochs,
-                           shuffle, shuffle_buffer_size, shuffle_seed,
-                           prefetch_buffer_size, reader_num_threads,
-                           parser_num_threads, sloppy_ordering, label_key)
+        Args:
+        ----
+          batch_size: An int representing the number of records to combine in a
+            single batch.
+          drop_final_batch: If `True`, and the batch size does not evenly divide the
+            input dataset size, the final smaller batch will be dropped. Defaults to
+            `False`.
+          num_epochs: Integer specifying the number of times to read through the
+            dataset. If None, cycles through the dataset forever. Defaults to
+            `None`.
+          shuffle: A boolean, indicates whether the input should be shuffled.
+            Defaults to `True`.
+          shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
+            number of items (i.e. records for a record based TFXIO) to hold. Only
+            data read into the buffer will be shuffled (there is no shuffling across
+            buffers). A large capacity ensures better shuffling but would increase
+            memory usage and startup time.
+          shuffle_seed: Randomization seed to use for shuffling.
+          prefetch_buffer_size: Number of feature batches to prefetch in order to
+            improve performance. Recommended value is the number of batches consumed
+            per training step. Defaults to auto-tune.
+          reader_num_threads: Number of threads used to read records. If >1, the
+            results will be interleaved. Defaults to tf.data.experimental.AUTOTUNE.
+          parser_num_threads: Number of threads to use for parsing `Example` tensors
+            into a dictionary of `Feature` tensors (if applicable). Defaults to
+            auto-tune.
+          sloppy_ordering: If `True`, reading performance will be improved at the
+            cost of non-deterministic ordering. If `False`, the order of elements
+            produced is deterministic prior to shuffling (elements are still
+            randomized if `shuffle=True`. Note that if the seed is set, then order
+            of elements after shuffling is deterministic). Defaults to False.
+          label_key: name of the label tensor. If provided, the returned dataset
+            will yield Tuple[Dict[str, Tensor], Tensor], where the second term in
+            the tuple is the label tensor and the dict (the first term) will not
+            contain the label feature.
+        """
+        return super().__new__(
+            cls,
+            batch_size,
+            drop_final_batch,
+            num_epochs,
+            shuffle,
+            shuffle_buffer_size,
+            shuffle_seed,
+            prefetch_buffer_size,
+            reader_num_threads,
+            parser_num_threads,
+            sloppy_ordering,
+            label_key,
+        )
 
 
 class RecordBatchesOptions(
-    NamedTuple('RecordBatchesOptions', [('batch_size', int),
-                                        ('drop_final_batch', bool),
-                                        ('num_epochs', Optional[int]),
-                                        ('shuffle', bool),
-                                        ('shuffle_buffer_size', int),
-                                        ('shuffle_seed', Optional[int])])):
-  """Options for TFXIO's RecordBatches.
+    NamedTuple(
+        "RecordBatchesOptions",
+        [
+            ("batch_size", int),
+            ("drop_final_batch", bool),
+            ("num_epochs", Optional[int]),
+            ("shuffle", bool),
+            ("shuffle_buffer_size", int),
+            ("shuffle_seed", Optional[int]),
+        ],
+    )
+):
+    """Options for TFXIO's RecordBatches.
 
-  Note: not all of these options may be effective. It depends on the particular
-  TFXIO's implementation.
-  """
+    Note: not all of these options may be effective. It depends on the particular
+    TFXIO's implementation.
+    """
 
-  def __new__(cls,
-              batch_size: int,
-              drop_final_batch: bool = False,
-              num_epochs: Optional[int] = None,
-              shuffle: bool = True,
-              shuffle_buffer_size: int = 10000,
-              shuffle_seed: Optional[int] = None):
-    """Returns a dataset options object.
+    def __new__(
+        cls,
+        batch_size: int,
+        drop_final_batch: bool = False,
+        num_epochs: Optional[int] = None,
+        shuffle: bool = True,
+        shuffle_buffer_size: int = 10000,
+        shuffle_seed: Optional[int] = None,
+    ):
+        """Returns a dataset options object.
 
-    Args:
-      batch_size: An int representing the number of records to combine in a
-        single batch.
-      drop_final_batch: If `True`, and the batch size does not evenly divide the
-        input dataset size, the final smaller batch will be dropped. Defaults to
-        `False`.
-      num_epochs: Integer specifying the number of times to read through the
-        dataset. If None, cycles through the dataset forever. Defaults to
-        `None`.
-      shuffle: A boolean, indicates whether the input should be shuffled.
-        Defaults to `True`.
-      shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
-        number of items (i.e. records for a record based TFXIO) to hold. Only
-        data read into the buffer will be shuffled (there is no shuffling across
-        buffers). A large capacity ensures better shuffling but would increase
-        memory usage and startup time.
-      shuffle_seed: Randomization seed to use for shuffling.
-    """
-    return super(RecordBatchesOptions,
-                 cls).__new__(cls, batch_size, drop_final_batch, num_epochs,
-                              shuffle, shuffle_buffer_size, shuffle_seed)
+        Args:
+        ----
+          batch_size: An int representing the number of records to combine in a
+            single batch.
+          drop_final_batch: If `True`, and the batch size does not evenly divide the
+            input dataset size, the final smaller batch will be dropped. Defaults to
+            `False`.
+          num_epochs: Integer specifying the number of times to read through the
+            dataset. If None, cycles through the dataset forever. Defaults to
+            `None`.
+          shuffle: A boolean, indicates whether the input should be shuffled.
+            Defaults to `True`.
+          shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
+            number of items (i.e. records for a record based TFXIO) to hold. Only
+            data read into the buffer will be shuffled (there is no shuffling across
+            buffers). A large capacity ensures better shuffling but would increase
+            memory usage and startup time.
+          shuffle_seed: Randomization seed to use for shuffling.
+        """
+        return super(RecordBatchesOptions, cls).__new__(
+            cls,
+            batch_size,
+            drop_final_batch,
+            num_epochs,
+            shuffle,
+            shuffle_buffer_size,
+            shuffle_seed,
+        )
diff --git a/tfx_bsl/tfxio/dataset_tfxio.py b/tfx_bsl/tfxio/dataset_tfxio.py
index 3cb9be12..2f31f3e4 100644
--- a/tfx_bsl/tfxio/dataset_tfxio.py
+++ b/tfx_bsl/tfxio/dataset_tfxio.py
@@ -22,161 +22,163 @@
 import apache_beam as beam
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_to_arrow
-from tfx_bsl.tfxio import tfxio
 
+from tfx_bsl.tfxio import dataset_options, tensor_adapter, tensor_to_arrow, tfxio
 
 SpecType = Union[
     tf.TensorSpec,
     tf.RaggedTensorSpec,
     tf.SparseTensorSpec,
-    Tuple['SpecType'],
+    Tuple["SpecType"],
     NamedTuple,
-    Dict[str, 'SpecType'],
-    OrderedDict[str, 'SpecType'],
+    Dict[str, "SpecType"],
+    OrderedDict[str, "SpecType"],
 ]
 
 
 def _CanonicalType(dtype: tf.dtypes.DType) -> tf.dtypes.DType:
-  """Returns TFXIO-canonical version of the given type."""
-  if dtype.is_floating:
-    return tf.float32
-  elif dtype.is_integer or dtype.is_bool:
-    return tf.int64
-  elif dtype in (tf.string, bytes):
-    return tf.string
-  else:
-    raise TypeError(
-        f'Got {dtype}. Only tf.uint8/16/32, tf.int8/16/32/64, tf.float16/32 and'
-        ' bytes/tf.string supported.'
-    )
+    """Returns TFXIO-canonical version of the given type."""
+    if dtype.is_floating:
+        return tf.float32
+    elif dtype.is_integer or dtype.is_bool:
+        return tf.int64
+    elif dtype in (tf.string, bytes):
+        return tf.string
+    else:
+        raise TypeError(
+            f"Got {dtype}. Only tf.uint8/16/32, tf.int8/16/32/64, tf.float16/32 and"
+            " bytes/tf.string supported."
+        )
 
 
 def _IsDict(element: SpecType) -> bool:
-  return isinstance(element, (dict, collections.OrderedDict))
+    return isinstance(element, (dict, collections.OrderedDict))
 
 
 def _IsNamedTuple(element: SpecType) -> bool:
-  return hasattr(element, '_fields')
-
-
-def _GetFeatureNames(
-    spec: SpecType, new_feature_index: int = 0
-) -> List[str]:
-  """Recursively generates feature names for given Dataset Structure.
-
-  Args:
-    spec: Dataset Structure
-    new_feature_index: New feature index
-
-  Returns:
-    List of feature names
-  """
-
-  name_gen = 'feature'
-  feature_names = []
-  if _IsDict(spec):
-    # If the spec is a OrderedDictionary/Dictionary
-    # 1. Iterate over (feature_name, <TensorLike-Spec>) pairs
-    # 2. Check the structure of each spec.
-    # 2.a If the spec is a nested structure, recursively retrieve nested
-    # feature names and append to features list as '<Outer>_<NestedFeature>'.
-    # 2.b If the spec is not nested, append the feature_name to return values.
-
-    for feature_name, tensor_spec in spec.items():  # pytype: disable=attribute-error
-      if not tf.nest.is_nested(tensor_spec):
-        feature_names.append(feature_name)
-
-      elif _IsDict(tensor_spec) or _IsNamedTuple(tensor_spec):
-        if _IsNamedTuple(tensor_spec):
-          tensor_spec = tensor_spec._asdict()
-        feature_names.extend(
-            [
-                feature_name + '_' + nested_feature_name
-                for nested_feature_name in _GetFeatureNames(
-                    tensor_spec, new_feature_index
-                )
-            ]
-        )
+    return hasattr(element, "_fields")
 
-  # If the spec is a NamedTuple, converts it to dictionary, and process
-  # as a Dict.
-  elif _IsNamedTuple(spec):
-    spec = spec._asdict()  # pytype: disable=attribute-error
-    feature_names.extend(_GetFeatureNames(spec, new_feature_index))
 
-  # If the spec is a regular tuple, iterate and branch out if a nested
-  # structure is found.
-  elif isinstance(spec, tuple):
-    for single_spec in spec:
-      if not tf.nest.is_nested(single_spec):
-        feature_names.append(''.join([name_gen, str(new_feature_index)]))
-        new_feature_index += 1
-      else:
-        feature_names.extend(_GetFeatureNames(single_spec, new_feature_index))
+def _GetFeatureNames(spec: SpecType, new_feature_index: int = 0) -> List[str]:
+    """Recursively generates feature names for given Dataset Structure.
 
-  # If spec is not nested, and is a standalone TensorSpec with no feature name
-  # new feature name is generated.
-  else:
-    feature_names.append(''.join([name_gen, str(new_feature_index)]))
+    Args:
+    ----
+      spec: Dataset Structure
+      new_feature_index: New feature index
 
-  return feature_names
+    Returns:
+    -------
+      List of feature names
+    """
+    name_gen = "feature"
+    feature_names = []
+    if _IsDict(spec):
+        # If the spec is a OrderedDictionary/Dictionary
+        # 1. Iterate over (feature_name, <TensorLike-Spec>) pairs
+        # 2. Check the structure of each spec.
+        # 2.a If the spec is a nested structure, recursively retrieve nested
+        # feature names and append to features list as '<Outer>_<NestedFeature>'.
+        # 2.b If the spec is not nested, append the feature_name to return values.
+
+        for (
+            feature_name,
+            tensor_spec,
+        ) in spec.items():  # pytype: disable=attribute-error
+            if not tf.nest.is_nested(tensor_spec):
+                feature_names.append(feature_name)
+
+            elif _IsDict(tensor_spec) or _IsNamedTuple(tensor_spec):
+                if _IsNamedTuple(tensor_spec):
+                    tensor_spec = tensor_spec._asdict()
+                feature_names.extend(
+                    [
+                        feature_name + "_" + nested_feature_name
+                        for nested_feature_name in _GetFeatureNames(
+                            tensor_spec, new_feature_index
+                        )
+                    ]
+                )
+
+    # If the spec is a NamedTuple, converts it to dictionary, and process
+    # as a Dict.
+    elif _IsNamedTuple(spec):
+        spec = spec._asdict()  # pytype: disable=attribute-error
+        feature_names.extend(_GetFeatureNames(spec, new_feature_index))
+
+    # If the spec is a regular tuple, iterate and branch out if a nested
+    # structure is found.
+    elif isinstance(spec, tuple):
+        for single_spec in spec:
+            if not tf.nest.is_nested(single_spec):
+                feature_names.append("".join([name_gen, str(new_feature_index)]))
+                new_feature_index += 1
+            else:
+                feature_names.extend(_GetFeatureNames(single_spec, new_feature_index))
+
+    # If spec is not nested, and is a standalone TensorSpec with no feature name
+    # new feature name is generated.
+    else:
+        feature_names.append("".join([name_gen, str(new_feature_index)]))
+
+    return feature_names
 
 
 def _GetDictStructureForElementSpec(
     *spec: SpecType, feature_names: Optional[List[str]] = None
 ) -> OrderedDict[str, SpecType]:
-  """Creates a flattened Dictionary-like Structure of given Dataset Structure.
-
-  Args:
-    *spec: Element spec for the dataset. This is used as *arg, since it can be a
-      tuple and is unpacked if not used as such.
-    feature_names: (kwarg) Feature names for columns in Dataset.
+    """Creates a flattened Dictionary-like Structure of given Dataset Structure.
 
-  Returns:
-    OrderedDict Structure.
-  """
-  original_spec = spec[0]
+    Args:
+    ----
+      *spec: Element spec for the dataset. This is used as *arg, since it can be a
+        tuple and is unpacked if not used as such.
+      feature_names: (kwarg) Feature names for columns in Dataset.
+
+    Returns:
+    -------
+      OrderedDict Structure.
+    """
+    original_spec = spec[0]
 
-  # Flattening the element_spec creates a list of Tensor Specs
-  flattened_spec = tf.nest.flatten(original_spec)
+    # Flattening the element_spec creates a list of Tensor Specs
+    flattened_spec = tf.nest.flatten(original_spec)
 
-  if not feature_names or (len(flattened_spec) != len(feature_names)):
-    feature_names = _GetFeatureNames(original_spec)
+    if not feature_names or (len(flattened_spec) != len(feature_names)):
+        feature_names = _GetFeatureNames(original_spec)
 
-  return collections.OrderedDict(zip(feature_names, flattened_spec))
+    return collections.OrderedDict(zip(feature_names, flattened_spec))
 
 
 def _PrepareDataset(
     dataset: tf.data.Dataset, feature_names: Optional[List[str]]
 ) -> tf.data.Dataset:
-  """Prepare tf.data.Dataset by modifying structure and casting to supporting dtypes.
-
-  Args:
-    dataset: A tf.data.Dataset having any structure of <tuple, namedtuple, dict,
-      OrderedDict>.
-    feature_names: Optional list of feature_names for flattened features in the
-      Dataset.
+    """Prepare tf.data.Dataset by modifying structure and casting to supporting dtypes.
 
-  Returns:
-    A modified tf.data.Dataset with flattened OrderedDict structure and TFXIO
-    supported dtypes.
-  """
-
-  dict_structure = _GetDictStructureForElementSpec(
-      dataset.element_spec, feature_names=feature_names
-  )
+    Args:
+    ----
+      dataset: A tf.data.Dataset having any structure of <tuple, namedtuple, dict,
+        OrderedDict>.
+      feature_names: Optional list of feature_names for flattened features in the
+        Dataset.
+
+    Returns:
+    -------
+      A modified tf.data.Dataset with flattened OrderedDict structure and TFXIO
+      supported dtypes.
+    """
+    dict_structure = _GetDictStructureForElementSpec(
+        dataset.element_spec, feature_names=feature_names
+    )
 
-  def _UpdateStructureAndCastDtypes(*x):
-    x = tf.nest.flatten(x)
-    x = tf.nest.pack_sequence_as(dict_structure, x)
-    for k, v in x.items():
-      x[k] = tf.cast(v, _CanonicalType(v.dtype))
-    return x
+    def _UpdateStructureAndCastDtypes(*x):
+        x = tf.nest.flatten(x)
+        x = tf.nest.pack_sequence_as(dict_structure, x)
+        for k, v in x.items():
+            x[k] = tf.cast(v, _CanonicalType(v.dtype))
+        return x
 
-  return dataset.map(_UpdateStructureAndCastDtypes)
+    return dataset.map(_UpdateStructureAndCastDtypes)
 
 
 def _LoadDatasetAsRecordBatch(
@@ -185,148 +187,149 @@ def _LoadDatasetAsRecordBatch(
     feature_names: Optional[List[str]],
     use_custom_reader: bool,
 ) -> Iterator[pa.RecordBatch]:
-  """Yields RecordBatches from a single shard.
+    """Yields RecordBatches from a single shard.
 
-  Args:
-    shard: Tuple of shard index and saved dataset path.
-    converter: TensorsToRecordBatchConverter Object.
-    feature_names: Optional list of feature names.
-    use_custom_reader: Flag to specify the shard reading method.
-
-  Yields:
-    Yields RecordBatches.
-  """
-  shard_num, path = shard
+    Args:
+    ----
+      shard: Tuple of shard index and saved dataset path.
+      converter: TensorsToRecordBatchConverter Object.
+      feature_names: Optional list of feature names.
+      use_custom_reader: Flag to specify the shard reading method.
+
+    Yields:
+    ------
+      Yields RecordBatches.
+    """
+    shard_num, path = shard
 
-  def _ReaderFunc(datasets):
-    return datasets.skip(shard_num).take(1).get_single_element()
+    def _ReaderFunc(datasets):
+        return datasets.skip(shard_num).take(1).get_single_element()
 
-  if use_custom_reader:
-    dataset = tf.data.Dataset.load(path, reader_func=_ReaderFunc)
-  else:
-    dataset = tf.data.Dataset.load(path)
+    if use_custom_reader:
+        dataset = tf.data.Dataset.load(path, reader_func=_ReaderFunc)
+    else:
+        dataset = tf.data.Dataset.load(path)
 
-  dataset = _PrepareDataset(dataset, feature_names=feature_names).prefetch(
-      tf.data.AUTOTUNE
-  )
+    dataset = _PrepareDataset(dataset, feature_names=feature_names).prefetch(
+        tf.data.AUTOTUNE
+    )
 
-  # Handles reading empty shards.
-  try:
-    for data in dataset:
-      yield converter.convert(data)
-  except (tf.errors.InvalidArgumentError, tf.errors.OutOfRangeError):
-    pass
+    # Handles reading empty shards.
+    try:
+        for data in dataset:
+            yield converter.convert(data)
+    except (tf.errors.InvalidArgumentError, tf.errors.OutOfRangeError):
+        pass
 
 
 class DatasetTFXIOOptions(NamedTuple):
-  """Options for DatasetTFXIO.
-
-  working_dir: A directory to write the intermediate materialized dataset. It is
-    expected to be remote accessible, since the execution is transferred to Beam
-    runners during BeamSource API call.
-  feature_names: List of feature names for columns/attributes. Expected to have
-    features name for all the features in the flattened feature_spec.
-    If None: Creates default feature names for non-named features and utilizes
-      existing feature names from named features (Dict, NamedTuples).
-    If Partial, creates default feature names for all the features.
-  num_shards: Number of shards to write the materialized dataset. Uses default
-    tf.data sharding if None. Check tf.data.Dataset.save documentation for more
-    details. [https://www.tensorflow.org/api_docs/python/tf/data/Dataset#save]
-  """
-
-  working_dir: str = tempfile.mkdtemp()
-  feature_names: Optional[List[str]] = None
-  num_shards: Optional[int] = None
+    """Options for DatasetTFXIO.
+
+    working_dir: A directory to write the intermediate materialized dataset. It is
+      expected to be remote accessible, since the execution is transferred to Beam
+      runners during BeamSource API call.
+    feature_names: List of feature names for columns/attributes. Expected to have
+      features name for all the features in the flattened feature_spec.
+      If None: Creates default feature names for non-named features and utilizes
+        existing feature names from named features (Dict, NamedTuples).
+      If Partial, creates default feature names for all the features.
+    num_shards: Number of shards to write the materialized dataset. Uses default
+      tf.data sharding if None. Check tf.data.Dataset.save documentation for more
+      details. [https://www.tensorflow.org/api_docs/python/tf/data/Dataset#save]
+    """
+
+    working_dir: str = tempfile.mkdtemp()
+    feature_names: Optional[List[str]] = None
+    num_shards: Optional[int] = None
 
 
 class DatasetTFXIO(tfxio.TFXIO):
-  """TFXIO implementation for tf.data.Dataset sources."""
+    """TFXIO implementation for tf.data.Dataset sources."""
+
+    def __init__(
+        self,
+        dataset: tf.data.Dataset,
+        options: DatasetTFXIOOptions = DatasetTFXIOOptions(),
+    ):
+        """Initializes DatasetTFXIO.
+
+        Args:
+        ----
+          dataset: A batched, finite tf.data.Dataset
+          options: DatasetTFXIOOptions Object, providing working directory, feature
+            names and number of shards for intermediate materialization options.
+        """
+        self._dataset = dataset
+        self._options = options
+        self._use_custom_sharding = bool(self._options.num_shards)
+
+        # Below we retrieve type_specs for the prepared dataset, which are used to
+        # create TensorsToRecordBatchConverter object. Since, it requires
+        # preparing the dataset, we initially modify only a single element (faster).
+        # The entire dataset is prepared in distributed manner on the beam runners
+        # during distributed read.
+        self._type_specs = _PrepareDataset(
+            self._dataset.take(1), feature_names=self._options.feature_names
+        ).element_spec
+        self._converter = tensor_to_arrow.TensorsToRecordBatchConverter(
+            self._type_specs
+        )
+        self._file_pattern = os.path.join(self._options.working_dir, "saved_dataset")
 
-  def __init__(
-      self,
-      dataset: tf.data.Dataset,
-      options: DatasetTFXIOOptions = DatasetTFXIOOptions(),
-  ):
-    """Initializes DatasetTFXIO.
+    def _SaveDataset(self, batch_size: Optional[int] = None):
+        def _CustomShardFunc(*unused_args) -> tf.Tensor:
+            if self._options.num_shards == 1:
+                return tf.constant(0, dtype=tf.int64)
+            return tf.random.uniform(
+                shape=(), maxval=self._options.num_shards, dtype=tf.int64
+            )
 
-    Args:
-      dataset: A batched, finite tf.data.Dataset
-      options: DatasetTFXIOOptions Object, providing working directory, feature
-        names and number of shards for intermediate materialization options.
-    """
-    self._dataset = dataset
-    self._options = options
-    self._use_custom_sharding = bool(self._options.num_shards)
-
-    # Below we retrieve type_specs for the prepared dataset, which are used to
-    # create TensorsToRecordBatchConverter object. Since, it requires
-    # preparing the dataset, we initially modify only a single element (faster).
-    # The entire dataset is prepared in distributed manner on the beam runners
-    # during distributed read.
-    self._type_specs = _PrepareDataset(
-        self._dataset.take(1), feature_names=self._options.feature_names
-    ).element_spec
-    self._converter = tensor_to_arrow.TensorsToRecordBatchConverter(
-        self._type_specs
-    )
-    self._file_pattern = os.path.join(
-        self._options.working_dir, 'saved_dataset'
-    )
+        if batch_size:
+            self._dataset = self._dataset.rebatch(batch_size)
 
-  def _SaveDataset(self, batch_size: Optional[int] = None):
-    def _CustomShardFunc(*unused_args) -> tf.Tensor:
-      if self._options.num_shards == 1:
-        return tf.constant(0, dtype=tf.int64)
-      return tf.random.uniform(
-          shape=(), maxval=self._options.num_shards, dtype=tf.int64
-      )
+        if self._use_custom_sharding:
+            self._dataset.save(self._file_pattern, shard_func=_CustomShardFunc)
+        else:
+            self._dataset.save(self._file_pattern)
 
-    if batch_size:
-      self._dataset = self._dataset.rebatch(batch_size)
+    def ArrowSchema(self) -> pa.Schema:
+        return self._converter.arrow_schema()
 
-    if self._use_custom_sharding:
-      self._dataset.save(self._file_pattern, shard_func=_CustomShardFunc)
-    else:
-      self._dataset.save(self._file_pattern)
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return self._converter.tensor_representations()
 
-  def ArrowSchema(self) -> pa.Schema:
-    return self._converter.arrow_schema()
+    def TensorAdapterConfig(self):
+        return tensor_adapter.TensorAdapterConfig(
+            self.ArrowSchema(), self.TensorRepresentations(), self._type_specs
+        )
 
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return self._converter.tensor_representations()
+    def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
+        self._SaveDataset(batch_size)
+
+        def _PTransformFn(pipeline):
+            num_shards = self._options.num_shards or 1
+            return (
+                pipeline
+                | beam.Create(enumerate([self._file_pattern] * num_shards))
+                | beam.FlatMap(
+                    _LoadDatasetAsRecordBatch,
+                    self._converter,
+                    self._options.feature_names,
+                    self._use_custom_sharding,
+                )
+            )
 
-  def TensorAdapterConfig(self):
-    return tensor_adapter.TensorAdapterConfig(
-        self.ArrowSchema(), self.TensorRepresentations(), self._type_specs
-    )
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def RecordBatches(self, options):
+        return _PrepareDataset(
+            self._dataset, feature_names=options.feature_names
+        ).prefetch(tf.data.AUTOTUNE)
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        raise NotImplementedError
 
-  def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
-    self._SaveDataset(batch_size)
-
-    def _PTransformFn(pipeline):
-      num_shards = self._options.num_shards or 1
-      return (
-          pipeline
-          | beam.Create(enumerate([self._file_pattern] * num_shards))
-          | beam.FlatMap(
-              _LoadDatasetAsRecordBatch,
-              self._converter,
-              self._options.feature_names,
-              self._use_custom_sharding,
-          )
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def RecordBatches(self, options):
-    return _PrepareDataset(
-        self._dataset, feature_names=options.feature_names
-    ).prefetch(tf.data.AUTOTUNE)
-
-  def TensorFlowDataset(
-      self, options: dataset_options.TensorFlowDatasetOptions
-  ) -> tf.data.Dataset:
-    raise NotImplementedError
-
-  def _ProjectImpl(self, tensor_names):
-    raise NotImplementedError
+    def _ProjectImpl(self, tensor_names):
+        raise NotImplementedError
diff --git a/tfx_bsl/tfxio/dataset_tfxio_test.py b/tfx_bsl/tfxio/dataset_tfxio_test.py
index 1a22fbe9..1ed98aa4 100644
--- a/tfx_bsl/tfxio/dataset_tfxio_test.py
+++ b/tfx_bsl/tfxio/dataset_tfxio_test.py
@@ -13,98 +13,94 @@
 # limitations under the License.
 """Tests for tfx_bsl.tfxio.dataset_tfxio."""
 
-
 import collections
 import os
 import tempfile
-from absl.testing import parameterized
+
 import apache_beam as beam
-from apache_beam.testing import test_pipeline
-from apache_beam.testing import util
 import tensorflow as tf
+from absl.testing import parameterized
+from apache_beam.testing import test_pipeline, util
+
 from tfx_bsl.tfxio import dataset_tfxio
 
 # Tests the simple output for DatasetTFXIO.BeamSource().
 # No Batch Size Change, and uses default tf.data sharding (preserves order).
 BEAMSOURCE_EXAMPLES = [
     dict(
-        testcase_name='tensor_slices',
+        testcase_name="tensor_slices",
         dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3]).batch(1),
         feature_names=[],
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'feature0': [[1]]},
-            {'feature0': [[2]]},
-            {'feature0': [[3]]},
+            {"feature0": [[1]]},
+            {"feature0": [[2]]},
+            {"feature0": [[3]]},
         ],
     ),
     dict(
-        testcase_name='tensor_slices_with_feature_names',
+        testcase_name="tensor_slices_with_feature_names",
         dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3]).batch(1),
-        feature_names=['x'],
+        feature_names=["x"],
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'x': [[1]]},
-            {'x': [[2]]},
-            {'x': [[3]]},
+            {"x": [[1]]},
+            {"x": [[2]]},
+            {"x": [[3]]},
         ],
     ),
     dict(
-        testcase_name='dict',
+        testcase_name="dict",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}
+            {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
         ).batch(2),
         feature_names=None,
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'a': [[1], [2]], 'b': [[5], [6]]},
-            {'a': [[3], [4]], 'b': [[7], [8]]},
+            {"a": [[1], [2]], "b": [[5], [6]]},
+            {"a": [[3], [4]], "b": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='dict_with_feature_names',
+        testcase_name="dict_with_feature_names",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}
+            {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
         ).batch(2),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple',
+        testcase_name="namedtuple",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
-                [1, 2, 3, 4], [5, 6, 7, 8]
-            )
+            collections.namedtuple("Data", ["a", "b"])([1, 2, 3, 4], [5, 6, 7, 8])
         ).batch(2),
         feature_names=None,
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'a': [[1], [2]], 'b': [[5], [6]]},
-            {'a': [[3], [4]], 'b': [[7], [8]]},
+            {"a": [[1], [2]], "b": [[5], [6]]},
+            {"a": [[3], [4]], "b": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple_with_feature_names',
+        testcase_name="namedtuple_with_feature_names",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
-                [1, 2, 3, 4], [5, 6, 7, 8]
-            )
+            collections.namedtuple("Data", ["a", "b"])([1, 2, 3, 4], [5, 6, 7, 8])
         ).batch(2),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=None,
         num_shards=None,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
         ],
     ),
 ]
@@ -113,39 +109,35 @@
 # Uses default tf.data sharding.
 BEAMSOURCE_BATCH_DEFAULT_SHARDS = [
     dict(
-        testcase_name='tensor_slices_batched_down',
-        dataset=tf.data.Dataset.from_tensor_slices(
-            [1, 2, 3, 4, 5, 6, 7, 8]
-        ).batch(4),
+        testcase_name="tensor_slices_batched_down",
+        dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8]).batch(4),
         feature_names=None,
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'feature0': [[1], [2]]},
-            {'feature0': [[3], [4]]},
-            {'feature0': [[5], [6]]},
-            {'feature0': [[7], [8]]},
+            {"feature0": [[1], [2]]},
+            {"feature0": [[3], [4]]},
+            {"feature0": [[5], [6]]},
+            {"feature0": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='tensor_slices_batched_up',
-        dataset=tf.data.Dataset.from_tensor_slices(
-            [1, 2, 3, 4, 5, 6, 7, 8]
-        ).batch(1),
+        testcase_name="tensor_slices_batched_up",
+        dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8]).batch(1),
         feature_names=None,
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'feature0': [[1], [2]]},
-            {'feature0': [[3], [4]]},
-            {'feature0': [[5], [6]]},
-            {'feature0': [[7], [8]]},
+            {"feature0": [[1], [2]]},
+            {"feature0": [[3], [4]]},
+            {"feature0": [[5], [6]]},
+            {"feature0": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple_batched_down',
+        testcase_name="namedtuple_batched_down",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
+            collections.namedtuple("Data", ["a", "b"])(
                 [1, 2, 3, 4, 5, 6, 7, 8], [11, 12, 13, 14, 15, 16, 17, 18]
             )
         ).batch(4),
@@ -153,52 +145,50 @@
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'a': [[1], [2]], 'b': [[11], [12]]},
-            {'a': [[3], [4]], 'b': [[13], [14]]},
-            {'a': [[5], [6]], 'b': [[15], [16]]},
-            {'a': [[7], [8]], 'b': [[17], [18]]},
+            {"a": [[1], [2]], "b": [[11], [12]]},
+            {"a": [[3], [4]], "b": [[13], [14]]},
+            {"a": [[5], [6]], "b": [[15], [16]]},
+            {"a": [[7], [8]], "b": [[17], [18]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple_batched_up',
+        testcase_name="namedtuple_batched_up",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
-                [1, 2, 3, 4], [5, 6, 7, 8]
-            )
+            collections.namedtuple("Data", ["a", "b"])([1, 2, 3, 4], [5, 6, 7, 8])
         ).batch(1),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='dict_batched_down',
+        testcase_name="dict_batched_down",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4, 5, 6], 'b': [5, 6, 7, 8, 9, 10]}
+            {"a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10]}
         ).batch(3),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
-            {'x': [[5], [6]], 'y': [[9], [10]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
+            {"x": [[5], [6]], "y": [[9], [10]]},
         ],
     ),
     dict(
-        testcase_name='dict_batched_up',
+        testcase_name="dict_batched_up",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4], 'b': [5, 6, 7, 8]}
+            {"a": [1, 2, 3, 4], "b": [5, 6, 7, 8]}
         ).batch(1),
         feature_names=None,
         batch_size=2,
         num_shards=None,
         expected_data=[
-            {'a': [[1], [2]], 'b': [[5], [6]]},
-            {'a': [[3], [4]], 'b': [[7], [8]]},
+            {"a": [[1], [2]], "b": [[5], [6]]},
+            {"a": [[3], [4]], "b": [[7], [8]]},
         ],
     ),
 ]
@@ -207,518 +197,523 @@
 # Uses custom no. of shards.
 BEAMSOURCE_BATCH_CUSTOM_SHARDS = [
     dict(
-        testcase_name='tensor_slices_batched_down_custom',
-        dataset=tf.data.Dataset.from_tensor_slices(
-            [1, 2, 3, 4, 5, 6, 7, 8]
-        ).batch(4),
+        testcase_name="tensor_slices_batched_down_custom",
+        dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8]).batch(4),
         feature_names=None,
         batch_size=2,
         num_shards=10,
         expected_data=[
-            {'feature0': [[1], [2]]},
-            {'feature0': [[3], [4]]},
-            {'feature0': [[5], [6]]},
-            {'feature0': [[7], [8]]},
+            {"feature0": [[1], [2]]},
+            {"feature0": [[3], [4]]},
+            {"feature0": [[5], [6]]},
+            {"feature0": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='tensor_slices_batched_up_custom',
-        dataset=tf.data.Dataset.from_tensor_slices(
-            [1, 2, 3, 4, 5, 6, 7, 8]
-        ).batch(1),
+        testcase_name="tensor_slices_batched_up_custom",
+        dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5, 6, 7, 8]).batch(1),
         feature_names=None,
         batch_size=4,
         num_shards=10,
         expected_data=[
-            {'feature0': [[1], [2], [3], [4]]},
-            {'feature0': [[5], [6], [7], [8]]},
+            {"feature0": [[1], [2], [3], [4]]},
+            {"feature0": [[5], [6], [7], [8]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple_batched_down_custom',
+        testcase_name="namedtuple_batched_down_custom",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
-                [1, 2, 3, 4], [5, 6, 7, 8]
-            )
+            collections.namedtuple("Data", ["a", "b"])([1, 2, 3, 4], [5, 6, 7, 8])
         ).batch(4),
         feature_names=None,
         batch_size=2,
         num_shards=10,
         expected_data=[
-            {'a': [[1], [2]], 'b': [[5], [6]]},
-            {'a': [[3], [4]], 'b': [[7], [8]]},
+            {"a": [[1], [2]], "b": [[5], [6]]},
+            {"a": [[3], [4]], "b": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='namedtuple_batched_up_custom',
+        testcase_name="namedtuple_batched_up_custom",
         dataset=tf.data.Dataset.from_tensor_slices(
-            collections.namedtuple('Data', ['a', 'b'])(
-                [1, 2, 3, 4], [5, 6, 7, 8]
-            )
+            collections.namedtuple("Data", ["a", "b"])([1, 2, 3, 4], [5, 6, 7, 8])
         ).batch(1),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=2,
         num_shards=10,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
         ],
     ),
     dict(
-        testcase_name='dict_batched_down_custom',
+        testcase_name="dict_batched_down_custom",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4, 5, 6], 'b': [5, 6, 7, 8, 9, 10]}
+            {"a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10]}
         ).batch(3),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=2,
         num_shards=10,
         expected_data=[
-            {'x': [[1], [2]], 'y': [[5], [6]]},
-            {'x': [[3], [4]], 'y': [[7], [8]]},
-            {'x': [[5], [6]], 'y': [[9], [10]]},
+            {"x": [[1], [2]], "y": [[5], [6]]},
+            {"x": [[3], [4]], "y": [[7], [8]]},
+            {"x": [[5], [6]], "y": [[9], [10]]},
         ],
     ),
     dict(
-        testcase_name='dict_batched_up_custom',
+        testcase_name="dict_batched_up_custom",
         dataset=tf.data.Dataset.from_tensor_slices(
-            {'a': [1, 2, 3, 4, 5, 6], 'b': [5, 6, 7, 8, 9, 10]}
+            {"a": [1, 2, 3, 4, 5, 6], "b": [5, 6, 7, 8, 9, 10]}
         ).batch(2),
-        feature_names=['x', 'y'],
+        feature_names=["x", "y"],
         batch_size=3,
         num_shards=10,
         expected_data=[
-            {'x': [[1], [2], [3]], 'y': [[5], [6], [7]]},
-            {'x': [[4], [5], [6]], 'y': [[8], [9], [10]]},
+            {"x": [[1], [2], [3]], "y": [[5], [6], [7]]},
+            {"x": [[4], [5], [6]], "y": [[8], [9], [10]]},
         ],
     ),
 ]
 
 
 class DatasetTfxioTest(tf.test.TestCase, parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      *[
-          dict(
-              testcase_name='simple_element_spec',
-              element_spec=tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([(
-                  'feature0',
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              )]),
-          ),
-          dict(
-              testcase_name='simple_element_spec_feature_name',
-              element_spec=tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              feature_names=['custom_feature'],
-              expected_dict=collections.OrderedDict([(
-                  'custom_feature',
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              )]),
-          ),
-          dict(
-              testcase_name='element_spec_with_shape',
-              element_spec=tf.TensorSpec(shape=(5,), dtype=tf.int32, name=None),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([(
-                  'feature0',
-                  tf.TensorSpec(shape=(5,), dtype=tf.int32, name=None),
-              )]),
-          ),
-          dict(
-              testcase_name='tuple_element_spec',
-              element_spec=(
-                  tf.TensorSpec(shape=(), dtype=tf.string, name=None),
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              ),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  (
-                      'feature0',
-                      tf.TensorSpec(shape=(), dtype=tf.string, name=None),
-                  ),
-                  (
-                      'feature1',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='tuple_element_spec_feature_names',
-              element_spec=(
-                  tf.TensorSpec(shape=(None,), dtype=tf.string, name=None),
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              ),
-              feature_names=['custom_feature_1', 'custom_feature_2'],
-              expected_dict=collections.OrderedDict([
-                  (
-                      'custom_feature_1',
-                      tf.TensorSpec(shape=(None,), dtype=tf.string, name=None),
-                  ),
-                  (
-                      'custom_feature_2',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='namedtuple',
-              element_spec=collections.namedtuple('Dummy', ['x', 'y'])(
-                  tf.TensorSpec(shape=(2, 5), dtype=tf.string, name=None),
-                  tf.TensorSpec(shape=(None,), dtype=tf.int32, name=None),
-              ),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  (
-                      'x',
-                      tf.TensorSpec(shape=(2, 5), dtype=tf.string, name=None),
-                  ),
-                  (
-                      'y',
-                      tf.TensorSpec(shape=(None,), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='namedtuple_feature_names',
-              element_spec=collections.namedtuple('Dummy', ['x', 'y'])(
-                  tf.TensorSpec(shape=(), dtype=tf.string, name=None),
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              ),
-              feature_names=['custom_feature_1', 'custom_feature_2'],
-              expected_dict=collections.OrderedDict([
-                  (
-                      'custom_feature_1',
-                      tf.TensorSpec(shape=(), dtype=tf.string, name=None),
-                  ),
-                  (
-                      'custom_feature_2',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='dict',
-              element_spec={
-                  'x': tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  'y': tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              },
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  ('x', tf.TensorSpec(shape=(), dtype=tf.float32, name=None)),
-                  ('y', tf.TensorSpec(shape=(), dtype=tf.int32, name=None)),
-              ]),
-          ),
-          dict(
-              testcase_name='dict_feature_names',
-              element_spec={
-                  'x': tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  'y': tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              },
-              feature_names=['custom_feature_1', 'custom_feature_2'],
-              expected_dict=collections.OrderedDict([
-                  (
-                      'custom_feature_1',
-                      tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  ),
-                  (
-                      'custom_feature_2',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='simple_nested_dict',
-              element_spec={
-                  'x': tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  'y': {
-                      'a': tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                      'b': tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  },
-              },
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  (
-                      'x',
-                      tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  ),
-                  (
-                      'y_a',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-                  (
-                      'y_b',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='tuple_with_32nested_structure',
-              element_spec=(
-                  {
-                      'x': tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                      'y': {
-                          'a': tf.TensorSpec(
-                              shape=(), dtype=tf.int32, name=None
-                          ),
-                          'b': tf.TensorSpec(
-                              shape=(), dtype=tf.int32, name=None
-                          ),
-                      },
-                  },
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              ),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  (
-                      'x',
-                      tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  ),
-                  (
-                      'y_a',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-                  (
-                      'y_b',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-                  (
-                      'feature0',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-          dict(
-              testcase_name='tuple_with_3_nested_structure',
-              element_spec=(
-                  {
-                      'x': tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                      'y': {
-                          'a': collections.namedtuple('Dummy', ['f1', 'f2'])(
-                              tf.TensorSpec(
-                                  shape=(), dtype=tf.string, name=None
-                              ),
-                              tf.TensorSpec(
-                                  shape=(), dtype=tf.int32, name=None
-                              ),
-                          ),
-                          'b': tf.TensorSpec(
-                              shape=(), dtype=tf.int32, name=None
-                          ),
-                      },
-                  },
-                  tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-              ),
-              feature_names=None,
-              expected_dict=collections.OrderedDict([
-                  (
-                      'x',
-                      tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
-                  ),
-                  (
-                      'y_a_f1',
-                      tf.TensorSpec(shape=(), dtype=tf.string, name=None),
-                  ),
-                  (
-                      'y_a_f2',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-                  (
-                      'y_b',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-                  (
-                      'feature0',
-                      tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
-                  ),
-              ]),
-          ),
-      ]
-  )
-  def test_dict_structure_from_element_specs(
-      self, element_spec, feature_names, expected_dict
-  ):
-    new_structure = dataset_tfxio._GetDictStructureForElementSpec(
-        element_spec, feature_names=feature_names
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="simple_element_spec",
+                element_spec=tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "feature0",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        )
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="simple_element_spec_feature_name",
+                element_spec=tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                feature_names=["custom_feature"],
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "custom_feature",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        )
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="element_spec_with_shape",
+                element_spec=tf.TensorSpec(shape=(5,), dtype=tf.int32, name=None),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "feature0",
+                            tf.TensorSpec(shape=(5,), dtype=tf.int32, name=None),
+                        )
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="tuple_element_spec",
+                element_spec=(
+                    tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                    tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                ),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "feature0",
+                            tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                        ),
+                        (
+                            "feature1",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="tuple_element_spec_feature_names",
+                element_spec=(
+                    tf.TensorSpec(shape=(None,), dtype=tf.string, name=None),
+                    tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                ),
+                feature_names=["custom_feature_1", "custom_feature_2"],
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "custom_feature_1",
+                            tf.TensorSpec(shape=(None,), dtype=tf.string, name=None),
+                        ),
+                        (
+                            "custom_feature_2",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="namedtuple",
+                element_spec=collections.namedtuple("Dummy", ["x", "y"])(
+                    tf.TensorSpec(shape=(2, 5), dtype=tf.string, name=None),
+                    tf.TensorSpec(shape=(None,), dtype=tf.int32, name=None),
+                ),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "x",
+                            tf.TensorSpec(shape=(2, 5), dtype=tf.string, name=None),
+                        ),
+                        (
+                            "y",
+                            tf.TensorSpec(shape=(None,), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="namedtuple_feature_names",
+                element_spec=collections.namedtuple("Dummy", ["x", "y"])(
+                    tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                    tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                ),
+                feature_names=["custom_feature_1", "custom_feature_2"],
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "custom_feature_1",
+                            tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                        ),
+                        (
+                            "custom_feature_2",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="dict",
+                element_spec={
+                    "x": tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                    "y": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                },
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        ("x", tf.TensorSpec(shape=(), dtype=tf.float32, name=None)),
+                        ("y", tf.TensorSpec(shape=(), dtype=tf.int32, name=None)),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="dict_feature_names",
+                element_spec={
+                    "x": tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                    "y": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                },
+                feature_names=["custom_feature_1", "custom_feature_2"],
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "custom_feature_1",
+                            tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        ),
+                        (
+                            "custom_feature_2",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="simple_nested_dict",
+                element_spec={
+                    "x": tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                    "y": {
+                        "a": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        "b": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                    },
+                },
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "x",
+                            tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        ),
+                        (
+                            "y_a",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                        (
+                            "y_b",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="tuple_with_32nested_structure",
+                element_spec=(
+                    {
+                        "x": tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        "y": {
+                            "a": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                            "b": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        },
+                    },
+                    tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                ),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "x",
+                            tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        ),
+                        (
+                            "y_a",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                        (
+                            "y_b",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                        (
+                            "feature0",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+            dict(
+                testcase_name="tuple_with_3_nested_structure",
+                element_spec=(
+                    {
+                        "x": tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        "y": {
+                            "a": collections.namedtuple("Dummy", ["f1", "f2"])(
+                                tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                                tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                            ),
+                            "b": tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        },
+                    },
+                    tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                ),
+                feature_names=None,
+                expected_dict=collections.OrderedDict(
+                    [
+                        (
+                            "x",
+                            tf.TensorSpec(shape=(), dtype=tf.float32, name=None),
+                        ),
+                        (
+                            "y_a_f1",
+                            tf.TensorSpec(shape=(), dtype=tf.string, name=None),
+                        ),
+                        (
+                            "y_a_f2",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                        (
+                            "y_b",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                        (
+                            "feature0",
+                            tf.TensorSpec(shape=(), dtype=tf.int32, name=None),
+                        ),
+                    ]
+                ),
+            ),
+        ]
     )
-    self.assertEqual(new_structure, expected_dict)
+    def test_dict_structure_from_element_specs(
+        self, element_spec, feature_names, expected_dict
+    ):
+        new_structure = dataset_tfxio._GetDictStructureForElementSpec(
+            element_spec, feature_names=feature_names
+        )
+        self.assertEqual(new_structure, expected_dict)
 
-  @parameterized.named_parameters(
-      *[
-          dict(
-              testcase_name='dataset_tuple',
-              dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3]),
-              feature_names=None,
-              expected_data=[
-                  collections.OrderedDict([('feature0', 1)]),
-                  collections.OrderedDict([('feature0', 2)]),
-                  collections.OrderedDict([('feature0', 3)]),
-              ],
-          ),
-          dict(
-              testcase_name='dataset_tuple_str',
-              dataset=tf.data.Dataset.from_tensor_slices(['foo', 'bar', 'baz']),
-              feature_names=None,
-              expected_data=[
-                  collections.OrderedDict([('feature0', b'foo')]),
-                  collections.OrderedDict([('feature0', b'bar')]),
-                  collections.OrderedDict([('feature0', b'baz')]),
-              ],
-          ),
-          dict(
-              testcase_name='dataset_tuple_with_feature_names',
-              dataset=tf.data.Dataset.from_tensor_slices(
-                  ([1, 2, 3], [4, 5, 6])
-              ),
-              feature_names=['x', 'y'],
-              expected_data=[
-                  collections.OrderedDict([('x', 1), ('y', 4)]),
-                  collections.OrderedDict([('x', 2), ('y', 5)]),
-                  collections.OrderedDict([('x', 3), ('y', 6)]),
-              ],
-          ),
-          dict(
-              testcase_name='dataset_dict',
-              dataset=tf.data.Dataset.from_tensor_slices(
-                  {'a': [1, 2], 'b': [3, 4]}
-              ),
-              feature_names=None,
-              expected_data=[
-                  collections.OrderedDict([('a', 1), ('b', 3)]),
-                  collections.OrderedDict([('a', 2), ('b', 4)]),
-              ],
-          ),
-          dict(
-              testcase_name='dataset_dict_with_feature_names',
-              dataset=tf.data.Dataset.from_tensor_slices(
-                  {'a': [1, 2], 'b': [3, 4]}
-              ),
-              feature_names=['f1', 'f2'],
-              expected_data=[
-                  collections.OrderedDict([('f1', 1), ('f2', 3)]),
-                  collections.OrderedDict([('f1', 2), ('f2', 4)]),
-              ],
-          ),
-          dict(
-              testcase_name='dataset_namedtuple',
-              dataset=tf.data.Dataset.from_tensor_slices(
-                  collections.namedtuple('Data', ['x', 'y'])([1], [2])
-              ),
-              feature_names=None,
-              expected_data=[collections.OrderedDict([('x', 1), ('y', 2)])],
-          ),
-          dict(
-              testcase_name='dataset_namedtuple_with_feature_names',
-              dataset=tf.data.Dataset.from_tensor_slices(
-                  collections.namedtuple('Data', ['x', 'y'])([1], [2])
-              ),
-              feature_names=['f1', 'f2'],
-              expected_data=[collections.OrderedDict([('f1', 1), ('f2', 2)])],
-          ),
-      ]
-  )
-  def test_prepare_dataset(self, dataset, feature_names, expected_data):
-    updated_dataset = dataset_tfxio._PrepareDataset(
-        dataset, feature_names=feature_names
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="dataset_tuple",
+                dataset=tf.data.Dataset.from_tensor_slices([1, 2, 3]),
+                feature_names=None,
+                expected_data=[
+                    collections.OrderedDict([("feature0", 1)]),
+                    collections.OrderedDict([("feature0", 2)]),
+                    collections.OrderedDict([("feature0", 3)]),
+                ],
+            ),
+            dict(
+                testcase_name="dataset_tuple_str",
+                dataset=tf.data.Dataset.from_tensor_slices(["foo", "bar", "baz"]),
+                feature_names=None,
+                expected_data=[
+                    collections.OrderedDict([("feature0", b"foo")]),
+                    collections.OrderedDict([("feature0", b"bar")]),
+                    collections.OrderedDict([("feature0", b"baz")]),
+                ],
+            ),
+            dict(
+                testcase_name="dataset_tuple_with_feature_names",
+                dataset=tf.data.Dataset.from_tensor_slices(([1, 2, 3], [4, 5, 6])),
+                feature_names=["x", "y"],
+                expected_data=[
+                    collections.OrderedDict([("x", 1), ("y", 4)]),
+                    collections.OrderedDict([("x", 2), ("y", 5)]),
+                    collections.OrderedDict([("x", 3), ("y", 6)]),
+                ],
+            ),
+            dict(
+                testcase_name="dataset_dict",
+                dataset=tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]}),
+                feature_names=None,
+                expected_data=[
+                    collections.OrderedDict([("a", 1), ("b", 3)]),
+                    collections.OrderedDict([("a", 2), ("b", 4)]),
+                ],
+            ),
+            dict(
+                testcase_name="dataset_dict_with_feature_names",
+                dataset=tf.data.Dataset.from_tensor_slices({"a": [1, 2], "b": [3, 4]}),
+                feature_names=["f1", "f2"],
+                expected_data=[
+                    collections.OrderedDict([("f1", 1), ("f2", 3)]),
+                    collections.OrderedDict([("f1", 2), ("f2", 4)]),
+                ],
+            ),
+            dict(
+                testcase_name="dataset_namedtuple",
+                dataset=tf.data.Dataset.from_tensor_slices(
+                    collections.namedtuple("Data", ["x", "y"])([1], [2])
+                ),
+                feature_names=None,
+                expected_data=[collections.OrderedDict([("x", 1), ("y", 2)])],
+            ),
+            dict(
+                testcase_name="dataset_namedtuple_with_feature_names",
+                dataset=tf.data.Dataset.from_tensor_slices(
+                    collections.namedtuple("Data", ["x", "y"])([1], [2])
+                ),
+                feature_names=["f1", "f2"],
+                expected_data=[collections.OrderedDict([("f1", 1), ("f2", 2)])],
+            ),
+        ]
     )
-    updated_data = list(updated_dataset.as_numpy_iterator())
-    self.assertAllEqual(updated_data, expected_data)
+    def test_prepare_dataset(self, dataset, feature_names, expected_data):
+        updated_dataset = dataset_tfxio._PrepareDataset(
+            dataset, feature_names=feature_names
+        )
+        updated_data = list(updated_dataset.as_numpy_iterator())
+        self.assertAllEqual(updated_data, expected_data)
 
-  def test_prepare_float_dataset(self):
-    dataset = tf.data.Dataset.from_tensor_slices([1.2, 3.4, 5.6])
-    expected_data = [1.2, 3.4, 5.6]
-    updated_dataset = dataset_tfxio._PrepareDataset(dataset, feature_names=None)
-    updated_data = [
-        list(i.values())[0] for i in list(updated_dataset.as_numpy_iterator())
-    ]
-    self.assertLen(updated_data, len(expected_data))
-    for x, y in zip(updated_data, expected_data):
-      self.assertAlmostEqual(x, y, places=5)
+    def test_prepare_float_dataset(self):
+        dataset = tf.data.Dataset.from_tensor_slices([1.2, 3.4, 5.6])
+        expected_data = [1.2, 3.4, 5.6]
+        updated_dataset = dataset_tfxio._PrepareDataset(dataset, feature_names=None)
+        updated_data = [
+            list(i.values())[0] for i in list(updated_dataset.as_numpy_iterator())
+        ]
+        self.assertLen(updated_data, len(expected_data))
+        for x, y in zip(updated_data, expected_data):
+            self.assertAlmostEqual(x, y, places=5)
 
-  def test_update_dataset_raises_error(self):
-    value = tf.constant(1 + 2j)
-    dataset = tf.data.Dataset.from_tensor_slices([value])
-    with self.assertRaisesRegex(
-        TypeError,
-        "Got <dtype: 'complex128'>. Only tf.uint8/16/32, tf.int8/16/32/64,"
-        ' tf.float16/32 and bytes/tf.string supported.',
-    ):
-      dataset_tfxio._PrepareDataset(dataset, feature_names=None)
+    def test_update_dataset_raises_error(self):
+        value = tf.constant(1 + 2j)
+        dataset = tf.data.Dataset.from_tensor_slices([value])
+        with self.assertRaisesRegex(
+            TypeError,
+            "Got <dtype: 'complex128'>. Only tf.uint8/16/32, tf.int8/16/32/64,"
+            " tf.float16/32 and bytes/tf.string supported.",
+        ):
+            dataset_tfxio._PrepareDataset(dataset, feature_names=None)
 
-  @parameterized.named_parameters(
-      *(
-          BEAMSOURCE_EXAMPLES
-          + BEAMSOURCE_BATCH_DEFAULT_SHARDS
-          + BEAMSOURCE_BATCH_CUSTOM_SHARDS
-      )
-  )
-  def test_dataset_tfxio_beam_source(
-      self,
-      dataset,
-      feature_names,
-      num_shards,
-      batch_size,
-      expected_data,
-  ):
-    options = dataset_tfxio.DatasetTFXIOOptions(
-        feature_names=feature_names,
-        num_shards=num_shards,
+    @parameterized.named_parameters(
+        *(
+            BEAMSOURCE_EXAMPLES
+            + BEAMSOURCE_BATCH_DEFAULT_SHARDS
+            + BEAMSOURCE_BATCH_CUSTOM_SHARDS
+        )
     )
-    ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
+    def test_dataset_tfxio_beam_source(
+        self,
+        dataset,
+        feature_names,
+        num_shards,
+        batch_size,
+        expected_data,
+    ):
+        options = dataset_tfxio.DatasetTFXIOOptions(
+            feature_names=feature_names,
+            num_shards=num_shards,
+        )
+        ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
 
-    with test_pipeline.TestPipeline() as p:
-      data = (
-          p
-          | ds_tfxio.BeamSource(batch_size=batch_size)
-          | beam.Map(lambda x: x.to_pydict())
-      )
+        with test_pipeline.TestPipeline() as p:
+            data = (
+                p
+                | ds_tfxio.BeamSource(batch_size=batch_size)
+                | beam.Map(lambda x: x.to_pydict())
+            )
 
-      util.assert_that(
-          data,
-          util.equal_to(expected_data),
-      )
+            util.assert_that(
+                data,
+                util.equal_to(expected_data),
+            )
 
-  @parameterized.named_parameters(*BEAMSOURCE_EXAMPLES)
-  def test_dataset_does_not_mutate(self, dataset, feature_names, **kwargs):
-    del kwargs
-    options = dataset_tfxio.DatasetTFXIOOptions(
-        feature_names=feature_names,
-    )
-    ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
-    # Invoking BeamSource to execute Dataset preparation and iteration.
-    with test_pipeline.TestPipeline() as p:
-      _ = p | ds_tfxio.BeamSource()
+    @parameterized.named_parameters(*BEAMSOURCE_EXAMPLES)
+    def test_dataset_does_not_mutate(self, dataset, feature_names, **kwargs):
+        del kwargs
+        options = dataset_tfxio.DatasetTFXIOOptions(
+            feature_names=feature_names,
+        )
+        ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
+        # Invoking BeamSource to execute Dataset preparation and iteration.
+        with test_pipeline.TestPipeline() as p:
+            _ = p | ds_tfxio.BeamSource()
 
-    dataset_1 = list(dataset.as_numpy_iterator())
-    dataset_2 = list(ds_tfxio._dataset.as_numpy_iterator())
+        dataset_1 = list(dataset.as_numpy_iterator())
+        dataset_2 = list(ds_tfxio._dataset.as_numpy_iterator())
 
-    self.assertLen(dataset_1, len(dataset_2))
-    for x, y in zip(dataset_1, dataset_2):
-      if isinstance(x, dict):
-        self.assertDictEqual(x, y, 'Dataset Differs')
-      else:
-        self.assertAllEqual(x, y, 'Dataset Differs')
+        self.assertLen(dataset_1, len(dataset_2))
+        for x, y in zip(dataset_1, dataset_2):
+            if isinstance(x, dict):
+                self.assertDictEqual(x, y, "Dataset Differs")
+            else:
+                self.assertAllEqual(x, y, "Dataset Differs")
 
-  @parameterized.named_parameters(
-      *[
-          dict(testcase_name='num_shards_1', num_shards=1, expected_count=1),
-          dict(testcase_name='num_shards_5', num_shards=5, expected_count=5),
-          dict(testcase_name='num_shards_10', num_shards=10, expected_count=10),
-      ]
-  )
-  def test_dataset_tfxio_num_shards(self, num_shards, expected_count):
-    dataset = tf.data.Dataset.from_tensor_slices(range(100)).batch(1)
-    working_dir = tempfile.mkdtemp()
-    options = dataset_tfxio.DatasetTFXIOOptions(
-        working_dir=working_dir, num_shards=num_shards
+    @parameterized.named_parameters(
+        *[
+            dict(testcase_name="num_shards_1", num_shards=1, expected_count=1),
+            dict(testcase_name="num_shards_5", num_shards=5, expected_count=5),
+            dict(testcase_name="num_shards_10", num_shards=10, expected_count=10),
+        ]
     )
-    ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
-    with test_pipeline.TestPipeline() as p:
-      _ = p | ds_tfxio.BeamSource()
+    def test_dataset_tfxio_num_shards(self, num_shards, expected_count):
+        dataset = tf.data.Dataset.from_tensor_slices(range(100)).batch(1)
+        working_dir = tempfile.mkdtemp()
+        options = dataset_tfxio.DatasetTFXIOOptions(
+            working_dir=working_dir, num_shards=num_shards
+        )
+        ds_tfxio = dataset_tfxio.DatasetTFXIO(dataset, options=options)
+        with test_pipeline.TestPipeline() as p:
+            _ = p | ds_tfxio.BeamSource()
 
-    shards = ['shard' in dirname for dirname, _, _ in os.walk(working_dir)]
-    self.assertEqual(sum(shards), expected_count)
+        shards = ["shard" in dirname for dirname, _, _ in os.walk(working_dir)]
+        self.assertEqual(sum(shards), expected_count)
 
 
-if __name__ == '__main__':
-  tf.test.main()
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tfx_bsl/tfxio/dataset_util.py b/tfx_bsl/tfxio/dataset_util.py
index 8c542637..d225ff1f 100644
--- a/tfx_bsl/tfxio/dataset_util.py
+++ b/tfx_bsl/tfxio/dataset_util.py
@@ -19,49 +19,51 @@
 
 
 def detect_compression_type(file_patterns: tf.Tensor) -> tf.Tensor:
-  """A TF function that detects compression type given file patterns.
-
-  It simply looks at the names (extensions) of files matching the patterns.
-
-  Args:
-    file_patterns: A 1-D string tensor that contains the file patterns.
-
-  Returns:
-    A scalar string tensor. The contents are either "GZIP", "" or
-    "INVALID_MIXED_COMPRESSION_TYPES".
-  """
-  # Implementation notes:
-  # Because the result of this function usually feeds to another
-  # function that creates a TF dataset, and the whole dataset creating logic
-  # is usually warpped in an input_fn in the trainer, this function must
-  # be a pure composition of TF ops. To be compatible with
-  # tf.compat.v1.make_oneshot_dataset, this function cannot be a @tf.function,
-  # and it cannot contain any conditional / stateful op either.
-  # Once we decide to stop supporting TF 1.x and tf.compat.v1, we can rewrite
-  # this as a @tf.function, and use tf.cond / tf.case to make the logic more
-  # readable.
-
-  files = tf.io.matching_files(file_patterns)
-  is_gz = tf.strings.regex_full_match(files, r".*\.gz$")
-  all_files_are_not_gz = tf.math.reduce_all(~is_gz)
-  all_files_are_gz = tf.math.reduce_all(is_gz)
-  # Encode the 4 cases as integers 0b00 - 0b11 where
-  # `all_files_are_not_gz` is bit 0
-  # `all_files_are_gz` is bit 1
-  # 00: invalid, some files are gz some files are not
-  # 01: all are not gz
-  # 10: all are gz
-  # 11: the only possibility is `files` is empty, can be arbitrary.
-  formats = tf.constant(["INVALID_MIXED_COMPRESSION_TYPES", "", "GZIP", ""])
-  index = (
-      tf.bitwise.left_shift(tf.cast(all_files_are_gz, tf.int32), 1) +
-      tf.cast(all_files_are_not_gz, tf.int32))
-
-  return formats[index]
+    """A TF function that detects compression type given file patterns.
+
+    It simply looks at the names (extensions) of files matching the patterns.
+
+    Args:
+    ----
+      file_patterns: A 1-D string tensor that contains the file patterns.
+
+    Returns:
+    -------
+      A scalar string tensor. The contents are either "GZIP", "" or
+      "INVALID_MIXED_COMPRESSION_TYPES".
+    """
+    # Implementation notes:
+    # Because the result of this function usually feeds to another
+    # function that creates a TF dataset, and the whole dataset creating logic
+    # is usually warpped in an input_fn in the trainer, this function must
+    # be a pure composition of TF ops. To be compatible with
+    # tf.compat.v1.make_oneshot_dataset, this function cannot be a @tf.function,
+    # and it cannot contain any conditional / stateful op either.
+    # Once we decide to stop supporting TF 1.x and tf.compat.v1, we can rewrite
+    # this as a @tf.function, and use tf.cond / tf.case to make the logic more
+    # readable.
+
+    files = tf.io.matching_files(file_patterns)
+    is_gz = tf.strings.regex_full_match(files, r".*\.gz$")
+    all_files_are_not_gz = tf.math.reduce_all(~is_gz)
+    all_files_are_gz = tf.math.reduce_all(is_gz)
+    # Encode the 4 cases as integers 0b00 - 0b11 where
+    # `all_files_are_not_gz` is bit 0
+    # `all_files_are_gz` is bit 1
+    # 00: invalid, some files are gz some files are not
+    # 01: all are not gz
+    # 10: all are gz
+    # 11: the only possibility is `files` is empty, can be arbitrary.
+    formats = tf.constant(["INVALID_MIXED_COMPRESSION_TYPES", "", "GZIP", ""])
+    index = tf.bitwise.left_shift(tf.cast(all_files_are_gz, tf.int32), 1) + tf.cast(
+        all_files_are_not_gz, tf.int32
+    )
+
+    return formats[index]
 
 
 def make_tf_record_dataset(
-    file_pattern: List[Text],
+    file_pattern: List[str],
     batch_size: int,
     drop_final_batch: bool,
     num_epochs: Optional[int],
@@ -71,54 +73,57 @@ def make_tf_record_dataset(
     reader_num_threads: int = tf.data.experimental.AUTOTUNE,
     sloppy_ordering: bool = False,
 ) -> tf.data.Dataset:
-  """Returns an interleaved TFRecordDataset with basic options.
-
-  This implementation is a simplified version of
-  tf.data.experimental.ops.make_tf_record_dataset().
-
-  Args:
-    file_pattern: One or a list of glob patterns. If a list, must not be empty.
-    batch_size: An int representing the number of records to combine in a single
-      batch.
-    drop_final_batch: If `True`, and the batch size does not evenly divide the
-      input dataset size, the final smaller batch will be dropped. Defaults to
-      `False`.
-    num_epochs: Integer specifying the number of times to read through the
-      dataset. If None, cycles through the dataset forever. Defaults to `None`.
-    shuffle: A boolean, indicates whether the input should be shuffled. Defaults
-      to `True`.
-    shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
-      number of items (i.e. records for a record based TFXIO) to hold. Only data
-      read into the buffer will be shuffled (there is no shuffling across
-      buffers). A large capacity ensures better shuffling but would increase
-      memory usage and startup time.
-    shuffle_seed: Randomization seed to use for shuffling.
-    reader_num_threads: Number of threads used to read records. If >1, the
-      results will be interleaved. Defaults to tf.data.experimental.AUTOTUNE.
-    sloppy_ordering: If `True`, reading performance will be improved at the
-      cost of non-deterministic ordering. If `False`, the order of elements
-      produced is deterministic prior to shuffling (elements are still
-      randomized if `shuffle=True`. Note that if the seed is set, then order
-      of elements after shuffling is deterministic). Defaults to False.
-  """
-  file_pattern = tf.convert_to_tensor(file_pattern)
-
-  dataset = tf.data.Dataset.list_files(
-      file_pattern, shuffle=shuffle, seed=shuffle_seed)
-
-  compression_type = detect_compression_type(file_pattern)
-  dataset = dataset.interleave(
-      lambda filename: tf.data.TFRecordDataset(filename, compression_type),
-      num_parallel_calls=reader_num_threads)
-  options = tf.data.Options()
-  options.experimental_deterministic = not sloppy_ordering
-  dataset = dataset.with_options(options)
-
-  if shuffle:
-    dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed)
-  if num_epochs != 1:
-    dataset = dataset.repeat(num_epochs)
-
-  drop_final_batch = drop_final_batch or num_epochs is None
-
-  return dataset.batch(batch_size, drop_remainder=drop_final_batch)
+    """Returns an interleaved TFRecordDataset with basic options.
+
+    This implementation is a simplified version of
+    tf.data.experimental.ops.make_tf_record_dataset().
+
+    Args:
+    ----
+      file_pattern: One or a list of glob patterns. If a list, must not be empty.
+      batch_size: An int representing the number of records to combine in a single
+        batch.
+      drop_final_batch: If `True`, and the batch size does not evenly divide the
+        input dataset size, the final smaller batch will be dropped. Defaults to
+        `False`.
+      num_epochs: Integer specifying the number of times to read through the
+        dataset. If None, cycles through the dataset forever. Defaults to `None`.
+      shuffle: A boolean, indicates whether the input should be shuffled. Defaults
+        to `True`.
+      shuffle_buffer_size: Buffer size of the items to shuffle. The size is the
+        number of items (i.e. records for a record based TFXIO) to hold. Only data
+        read into the buffer will be shuffled (there is no shuffling across
+        buffers). A large capacity ensures better shuffling but would increase
+        memory usage and startup time.
+      shuffle_seed: Randomization seed to use for shuffling.
+      reader_num_threads: Number of threads used to read records. If >1, the
+        results will be interleaved. Defaults to tf.data.experimental.AUTOTUNE.
+      sloppy_ordering: If `True`, reading performance will be improved at the
+        cost of non-deterministic ordering. If `False`, the order of elements
+        produced is deterministic prior to shuffling (elements are still
+        randomized if `shuffle=True`. Note that if the seed is set, then order
+        of elements after shuffling is deterministic). Defaults to False.
+    """
+    file_pattern = tf.convert_to_tensor(file_pattern)
+
+    dataset = tf.data.Dataset.list_files(
+        file_pattern, shuffle=shuffle, seed=shuffle_seed
+    )
+
+    compression_type = detect_compression_type(file_pattern)
+    dataset = dataset.interleave(
+        lambda filename: tf.data.TFRecordDataset(filename, compression_type),
+        num_parallel_calls=reader_num_threads,
+    )
+    options = tf.data.Options()
+    options.experimental_deterministic = not sloppy_ordering
+    dataset = dataset.with_options(options)
+
+    if shuffle:
+        dataset = dataset.shuffle(shuffle_buffer_size, shuffle_seed)
+    if num_epochs != 1:
+        dataset = dataset.repeat(num_epochs)
+
+    drop_final_batch = drop_final_batch or num_epochs is None
+
+    return dataset.batch(batch_size, drop_remainder=drop_final_batch)
diff --git a/tfx_bsl/tfxio/dataset_util_test.py b/tfx_bsl/tfxio/dataset_util_test.py
index b19aa8c1..4d404896 100644
--- a/tfx_bsl/tfxio/dataset_util_test.py
+++ b/tfx_bsl/tfxio/dataset_util_test.py
@@ -14,124 +14,141 @@
 """Tests for tfx_bsl.tfxio.dataset_util."""
 
 import os
-import pytest
 import tempfile
 
+import pytest
+import tensorflow as tf
 from absl import flags
 from absl.testing import parameterized
-import tensorflow as tf
+from tensorflow.python.framework import (
+    test_util,  # pylint: disable=g-direct-tensorflow-import
+)
+
 from tfx_bsl.tfxio import dataset_util
-from tensorflow.python.framework import test_util  # pylint: disable=g-direct-tensorflow-import
 
 FLAGS = flags.FLAGS
 
-_RECORDS = [b'aaa', b'bbb']
+_RECORDS = [b"aaa", b"bbb"]
 
 
 def _write_inputs(filename):
-  with tf.io.TFRecordWriter(filename) as w:
-    for s in _RECORDS:
-      w.write(s)
+    with tf.io.TFRecordWriter(filename) as w:
+        for s in _RECORDS:
+            w.write(s)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class DatasetUtilTest(tf.test.TestCase, parameterized.TestCase):
-
-  def setUp(self):
-    super(DatasetUtilTest, self).setUp()
-    self._example_file = os.path.join(FLAGS.test_tmpdir, 'datasetutiltest',
-                                      'input')
-    tf.io.gfile.makedirs(os.path.dirname(self._example_file))
-    _write_inputs(self._example_file)
-
-  @parameterized.named_parameters(*[
-      dict(
-          testcase_name='default_options',
-          batch_size=1,
-          drop_final_batch=False,
-          num_epochs=1,
-          expected_data=[[b'aaa'], [b'bbb']]),
-      dict(
-          testcase_name='batch',
-          batch_size=2,
-          drop_final_batch=False,
-          num_epochs=1,
-          expected_data=[[b'aaa', b'bbb']]),
-      dict(
-          testcase_name='batch_2_epochs',
-          batch_size=2,
-          drop_final_batch=False,
-          num_epochs=2,
-          expected_data=[[b'aaa', b'bbb'], [b'aaa', b'bbb']]),
-      dict(
-          testcase_name='drop_final_batch',
-          batch_size=3,
-          drop_final_batch=True,
-          num_epochs=4,
-          expected_data=[[b'aaa', b'bbb', b'aaa'], [b'bbb', b'aaa', b'bbb']])
-  ])
-  @test_util.run_in_graph_and_eager_modes
-  def test_make_tf_record_dataset(self, batch_size, drop_final_batch,
-                                  num_epochs, expected_data):
-    dataset = dataset_util.make_tf_record_dataset(
-        self._example_file,
-        batch_size,
-        drop_final_batch,
-        num_epochs,
-        shuffle=False,
-        shuffle_buffer_size=10000,
-        shuffle_seed=None)
-    data = _dataset_elements(dataset)
-    self.assertAllEqual(data, expected_data)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test_detect_compression_type(self):
-    tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
-
-    def _touch_file(path):
-      with open(path, 'w+') as _:
-        pass
-
-    _touch_file(os.path.join(tmp_dir, 'dataset-a-0.gz'))
-    _touch_file(os.path.join(tmp_dir, 'dataset-a-1.gz'))
-    _touch_file(os.path.join(tmp_dir, 'dataset-b-0'))
-    _touch_file(os.path.join(tmp_dir, 'dataset-b-1'))
-
-    self.assertAllEqual(
-        dataset_util.detect_compression_type(
-            [os.path.join(tmp_dir, 'dataset-a*')]), b'GZIP')
-
-    self.assertAllEqual(
-        dataset_util.detect_compression_type(
-            [os.path.join(tmp_dir, 'dataset-b*')]), b'')
-
-    self.assertAllEqual(
-        dataset_util.detect_compression_type([
-            os.path.join(tmp_dir, 'dataset-b*'),
-            os.path.join(tmp_dir, 'dataset-a*')
-        ]), b'INVALID_MIXED_COMPRESSION_TYPES')
-
-    self.assertAllEqual(
-        dataset_util.detect_compression_type(
-            [os.path.join(tmp_dir, 'invalid*')]), b'')
+    def setUp(self):
+        super(DatasetUtilTest, self).setUp()
+        self._example_file = os.path.join(FLAGS.test_tmpdir, "datasetutiltest", "input")
+        tf.io.gfile.makedirs(os.path.dirname(self._example_file))
+        _write_inputs(self._example_file)
+
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="default_options",
+                batch_size=1,
+                drop_final_batch=False,
+                num_epochs=1,
+                expected_data=[[b"aaa"], [b"bbb"]],
+            ),
+            dict(
+                testcase_name="batch",
+                batch_size=2,
+                drop_final_batch=False,
+                num_epochs=1,
+                expected_data=[[b"aaa", b"bbb"]],
+            ),
+            dict(
+                testcase_name="batch_2_epochs",
+                batch_size=2,
+                drop_final_batch=False,
+                num_epochs=2,
+                expected_data=[[b"aaa", b"bbb"], [b"aaa", b"bbb"]],
+            ),
+            dict(
+                testcase_name="drop_final_batch",
+                batch_size=3,
+                drop_final_batch=True,
+                num_epochs=4,
+                expected_data=[[b"aaa", b"bbb", b"aaa"], [b"bbb", b"aaa", b"bbb"]],
+            ),
+        ]
+    )
+    @test_util.run_in_graph_and_eager_modes
+    def test_make_tf_record_dataset(
+        self, batch_size, drop_final_batch, num_epochs, expected_data
+    ):
+        dataset = dataset_util.make_tf_record_dataset(
+            self._example_file,
+            batch_size,
+            drop_final_batch,
+            num_epochs,
+            shuffle=False,
+            shuffle_buffer_size=10000,
+            shuffle_seed=None,
+        )
+        data = _dataset_elements(dataset)
+        self.assertAllEqual(data, expected_data)
+
+    @test_util.run_in_graph_and_eager_modes
+    def test_detect_compression_type(self):
+        tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
+
+        def _touch_file(path):
+            with open(path, "w+") as _:
+                pass
+
+        _touch_file(os.path.join(tmp_dir, "dataset-a-0.gz"))
+        _touch_file(os.path.join(tmp_dir, "dataset-a-1.gz"))
+        _touch_file(os.path.join(tmp_dir, "dataset-b-0"))
+        _touch_file(os.path.join(tmp_dir, "dataset-b-1"))
+
+        self.assertAllEqual(
+            dataset_util.detect_compression_type([os.path.join(tmp_dir, "dataset-a*")]),
+            b"GZIP",
+        )
+
+        self.assertAllEqual(
+            dataset_util.detect_compression_type([os.path.join(tmp_dir, "dataset-b*")]),
+            b"",
+        )
+
+        self.assertAllEqual(
+            dataset_util.detect_compression_type(
+                [
+                    os.path.join(tmp_dir, "dataset-b*"),
+                    os.path.join(tmp_dir, "dataset-a*"),
+                ]
+            ),
+            b"INVALID_MIXED_COMPRESSION_TYPES",
+        )
+
+        self.assertAllEqual(
+            dataset_util.detect_compression_type([os.path.join(tmp_dir, "invalid*")]),
+            b"",
+        )
 
 
 def _dataset_elements(dataset):
-  """Returns elements from the `tf.data.Dataset` object as a list."""
-  results = []
-  if tf.executing_eagerly():
-    for elem in dataset:
-      results.append(elem.numpy())
-  else:
-    iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
-    next_elem = iterator.get_next()
-    with tf.compat.v1.Session() as sess:
-      while True:
-        try:
-          results.append(sess.run(next_elem))
-        except tf.errors.OutOfRangeError:
-          break
-  return results
-
-if __name__ == '__main__':
-  tf.test.main()
+    """Returns elements from the `tf.data.Dataset` object as a list."""
+    results = []
+    if tf.executing_eagerly():
+        for elem in dataset:
+            results.append(elem.numpy())
+    else:
+        iterator = tf.compat.v1.data.make_one_shot_iterator(dataset)
+        next_elem = iterator.get_next()
+        with tf.compat.v1.Session() as sess:
+            while True:
+                try:
+                    results.append(sess.run(next_elem))
+                except tf.errors.OutOfRangeError:
+                    break
+    return results
+
+
+if __name__ == "__main__":
+    tf.test.main()
diff --git a/tfx_bsl/tfxio/parquet_tfxio.py b/tfx_bsl/tfxio/parquet_tfxio.py
index a605b98b..2ce3f3f1 100644
--- a/tfx_bsl/tfxio/parquet_tfxio.py
+++ b/tfx_bsl/tfxio/parquet_tfxio.py
@@ -18,197 +18,216 @@
 from typing import Any, Dict, List, Optional, Union
 
 import apache_beam as beam
-from apache_beam.io.filesystems import FileSystems
 import pyarrow as pa
 import pyarrow.parquet as pq
 import tensorflow as tf
-from tfx_bsl.coders import batch_util
-from tfx_bsl.coders import csv_decoder
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import telemetry
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_representation_util
-from tfx_bsl.tfxio import tfxio
-
+from apache_beam.io.filesystems import FileSystems
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import batch_util, csv_decoder
+from tfx_bsl.tfxio import (
+    dataset_options,
+    record_based_tfxio,
+    telemetry,
+    tensor_adapter,
+    tensor_representation_util,
+    tfxio,
+)
+
 _PARQUET_FORMAT = "parquet"
 
 _RecordDataType = Union[str, int, float, Dict[Any, Any], List[Any]]
 
 
-def _RecordDictsToRecordBatch(dicts: List[Dict[str, _RecordDataType]],
-                              schema: pa.Schema) -> pa.RecordBatch:
-  record_data_arrays = []
-  for name, array_type in zip(schema.names, schema.types):
-    record_data_arrays.append(
-        pa.array([record[name] for record in dicts], array_type))
-  return pa.RecordBatch.from_arrays(record_data_arrays, schema)
+def _RecordDictsToRecordBatch(
+    dicts: List[Dict[str, _RecordDataType]], schema: pa.Schema
+) -> pa.RecordBatch:
+    record_data_arrays = []
+    for name, array_type in zip(schema.names, schema.types):
+        record_data_arrays.append(
+            pa.array([record[name] for record in dicts], array_type)
+        )
+    return pa.RecordBatch.from_arrays(record_data_arrays, schema)
 
 
 class ParquetTFXIO(record_based_tfxio.RecordBasedTFXIO):
-  """TFXIO implementation for Parquet."""
-
-  def __init__(self,
-               file_pattern: Union[str, List[str]],
-               *,
-               column_names: Optional[List[str]] = None,
-               min_bundle_size: int = 0,
-               schema: Optional[schema_pb2.Schema] = None,
-               validate: bool = True,
-               telemetry_descriptors: Optional[List[str]] = None):
-    """Initializes a Parquet TFXIO.
-
-    Args:
-      file_pattern: One or a list of file glob patterns to read parquet files
-        from.
-      column_names: List of column names to read from the parquet files.
-      min_bundle_size: the minimum size in bytes, to be considered when
-        splitting the parquet input into bundles. If not provided, all columns
-        in the dataset will be read.
-      schema: An optional TFMD Schema describing the dataset. If schema is
-        provided, it will determine the data type of the parquet columns.
-        Otherwise, the each column's data type will be inferred by the decoder.
-      validate: Boolean flag to verify that the files exist during the pipeline
-        creation time.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-    """
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        raw_record_column_name=None,
-        logical_format=_PARQUET_FORMAT,
-        physical_format=_PARQUET_FORMAT)
-    self._file_pattern = (
-        file_pattern if isinstance(file_pattern, list) else [file_pattern])
-    self._column_names = column_names
-    self._min_bundle_size = min_bundle_size
-    self._validate = validate
-    self._schema = schema
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-
-    @beam.typehints.with_output_types(bytes)
-    def _PTransformFn(
-        pcoll_or_pipeline: Union[beam.PCollection, beam.Pipeline]
-    ) -> beam.PCollection[bytes]:
-      """Reads Parquet records and serializes to bytes."""
-      source_pcolls = []
-      for i, pattern in enumerate(self._file_pattern):
-        source_pcolls.append(
-            pcoll_or_pipeline
-            | f"ReadFromParquet[{i}]" >> beam.io.ReadFromParquet(
-                file_pattern=pattern,
-                min_bundle_size=self._min_bundle_size,
-                validate=self._validate,
-                columns=self._column_names))
-      return (source_pcolls | "FlattenPCollsFromPatterns" >> beam.Flatten()
-              | "EncodeRawRecords" >> beam.Map(pickle.dumps))
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(
-        raw_records_pcoll: beam.PCollection[bytes]
-    ) -> beam.PCollection[pa.RecordBatch]:
-      """Decodes raw records and converts them to RecordBatches."""
-      return (
-          raw_records_pcoll
-          | "DecodeRawRecords" >> beam.Map(pickle.loads)
-          | "Batch"
-          >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
-          | "ToRecordBatch"
-          >> beam.Map(_RecordDictsToRecordBatch, schema=self.ArrowSchema())
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
-    # We override the `RecordBasedTFXIO`s `BeamSource` that is a composition of
-    # `_RawRecordBeamSourceInternal` and `_RawRecordToRecordBatchInternal` with
-    # a more efficient implementation that uses batched read.
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(
-        pcoll_or_pipeline: Union[beam.PCollection, beam.Pipeline]
-    ) -> beam.PCollection[pa.RecordBatch]:
-      """Reads Parquet tables and converts to RecordBatches."""
-      source_pcolls = []
-      for i, pattern in enumerate(self._file_pattern):
-        source_pcolls.append(
-            pcoll_or_pipeline
-            | f"ReadFromParquetBatched[{i}]" >> beam.io.ReadFromParquetBatched(
-                file_pattern=pattern,
-                min_bundle_size=self._min_bundle_size,
-                validate=self._validate,
-                columns=self._column_names))
-      return (
-          source_pcolls
-          | "FlattenPCollsFromPatterns" >> beam.Flatten()
-          | "ToRecordBatch"
-          >> beam.FlatMap(lambda table: table.to_batches(batch_size))
-          | "CollectRecordBatchTelemetry"
-          >> telemetry.ProfileRecordBatches(
-              self._telemetry_descriptors, _PARQUET_FORMAT, _PARQUET_FORMAT
-          )
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    raise NotImplementedError
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    # ParquetTFXIO does not support attaching raw record column and therefore
-    # columns' schema does not contain the raw record column.
-    if self._schema is None:
-      return self._InferArrowSchema()
-
-    # If the column names are not passed, we default to all column names in the
-    # schema.
-    columns = self._column_names or [f.name for f in self._schema.feature]
-
-    return csv_decoder.GetArrowSchema(columns, self._schema)
-
-  def _InferArrowSchema(self) -> pa.Schema:
-    match_result = FileSystems.match(self._file_pattern)[0]
-    files_metadata = match_result.metadata_list[0]
-    with FileSystems.open(files_metadata.path) as f:
-      return pq.read_schema(f)
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-        self._schema)
-
-  def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
-    """Returns a projected TFXIO.
-
-    Projection is pushed down to the Parquet Beam source.
-
-    The Projected TFXIO will project the record batches, arrow schema,
-    and the TFMD schema.
-
-    Args:
-      tensor_names: The columns to project.
-    """
-    projected_schema = (
-        tensor_representation_util.ProjectTensorRepresentationsInSchema(
-            self._schema, tensor_names))
-    result = copy.copy(self)
-
-    result._column_names = [f.name for f in projected_schema.feature]  # pylint: disable=protected-access
-    result._schema = projected_schema  # pylint: disable=protected-access
-    return result
+    """TFXIO implementation for Parquet."""
+
+    def __init__(
+        self,
+        file_pattern: Union[str, List[str]],
+        *,
+        column_names: Optional[List[str]] = None,
+        min_bundle_size: int = 0,
+        schema: Optional[schema_pb2.Schema] = None,
+        validate: bool = True,
+        telemetry_descriptors: Optional[List[str]] = None,
+    ):
+        """Initializes a Parquet TFXIO.
+
+        Args:
+        ----
+          file_pattern: One or a list of file glob patterns to read parquet files
+            from.
+          column_names: List of column names to read from the parquet files.
+          min_bundle_size: the minimum size in bytes, to be considered when
+            splitting the parquet input into bundles. If not provided, all columns
+            in the dataset will be read.
+          schema: An optional TFMD Schema describing the dataset. If schema is
+            provided, it will determine the data type of the parquet columns.
+            Otherwise, the each column's data type will be inferred by the decoder.
+          validate: Boolean flag to verify that the files exist during the pipeline
+            creation time.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+        """
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            raw_record_column_name=None,
+            logical_format=_PARQUET_FORMAT,
+            physical_format=_PARQUET_FORMAT,
+        )
+        self._file_pattern = (
+            file_pattern if isinstance(file_pattern, list) else [file_pattern]
+        )
+        self._column_names = column_names
+        self._min_bundle_size = min_bundle_size
+        self._validate = validate
+        self._schema = schema
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        @beam.typehints.with_output_types(bytes)
+        def _PTransformFn(
+            pcoll_or_pipeline: Union[beam.PCollection, beam.Pipeline],
+        ) -> beam.PCollection[bytes]:
+            """Reads Parquet records and serializes to bytes."""
+            source_pcolls = []
+            for i, pattern in enumerate(self._file_pattern):
+                source_pcolls.append(
+                    pcoll_or_pipeline
+                    | f"ReadFromParquet[{i}]"
+                    >> beam.io.ReadFromParquet(
+                        file_pattern=pattern,
+                        min_bundle_size=self._min_bundle_size,
+                        validate=self._validate,
+                        columns=self._column_names,
+                    )
+                )
+            return (
+                source_pcolls
+                | "FlattenPCollsFromPatterns" >> beam.Flatten()
+                | "EncodeRawRecords" >> beam.Map(pickle.dumps)
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(
+            raw_records_pcoll: beam.PCollection[bytes],
+        ) -> beam.PCollection[pa.RecordBatch]:
+            """Decodes raw records and converts them to RecordBatches."""
+            return (
+                raw_records_pcoll
+                | "DecodeRawRecords" >> beam.Map(pickle.loads)
+                | "Batch"
+                >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
+                | "ToRecordBatch"
+                >> beam.Map(_RecordDictsToRecordBatch, schema=self.ArrowSchema())
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
+        # We override the `RecordBasedTFXIO`s `BeamSource` that is a composition of
+        # `_RawRecordBeamSourceInternal` and `_RawRecordToRecordBatchInternal` with
+        # a more efficient implementation that uses batched read.
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(
+            pcoll_or_pipeline: Union[beam.PCollection, beam.Pipeline],
+        ) -> beam.PCollection[pa.RecordBatch]:
+            """Reads Parquet tables and converts to RecordBatches."""
+            source_pcolls = []
+            for i, pattern in enumerate(self._file_pattern):
+                source_pcolls.append(
+                    pcoll_or_pipeline
+                    | f"ReadFromParquetBatched[{i}]"
+                    >> beam.io.ReadFromParquetBatched(
+                        file_pattern=pattern,
+                        min_bundle_size=self._min_bundle_size,
+                        validate=self._validate,
+                        columns=self._column_names,
+                    )
+                )
+            return (
+                source_pcolls
+                | "FlattenPCollsFromPatterns" >> beam.Flatten()
+                | "ToRecordBatch"
+                >> beam.FlatMap(lambda table: table.to_batches(batch_size))
+                | "CollectRecordBatchTelemetry"
+                >> telemetry.ProfileRecordBatches(
+                    self._telemetry_descriptors, _PARQUET_FORMAT, _PARQUET_FORMAT
+                )
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        raise NotImplementedError
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        # ParquetTFXIO does not support attaching raw record column and therefore
+        # columns' schema does not contain the raw record column.
+        if self._schema is None:
+            return self._InferArrowSchema()
+
+        # If the column names are not passed, we default to all column names in the
+        # schema.
+        columns = self._column_names or [f.name for f in self._schema.feature]
+
+        return csv_decoder.GetArrowSchema(columns, self._schema)
+
+    def _InferArrowSchema(self) -> pa.Schema:
+        match_result = FileSystems.match(self._file_pattern)[0]
+        files_metadata = match_result.metadata_list[0]
+        with FileSystems.open(files_metadata.path) as f:
+            return pq.read_schema(f)
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
+            self._schema
+        )
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        """Returns a projected TFXIO.
+
+        Projection is pushed down to the Parquet Beam source.
+
+        The Projected TFXIO will project the record batches, arrow schema,
+        and the TFMD schema.
+
+        Args:
+        ----
+          tensor_names: The columns to project.
+        """
+        projected_schema = (
+            tensor_representation_util.ProjectTensorRepresentationsInSchema(
+                self._schema, tensor_names
+            )
+        )
+        result = copy.copy(self)
+
+        result._column_names = [f.name for f in projected_schema.feature]  # pylint: disable=protected-access
+        result._schema = projected_schema  # pylint: disable=protected-access
+        return result
diff --git a/tfx_bsl/tfxio/parquet_tfxio_test.py b/tfx_bsl/tfxio/parquet_tfxio_test.py
index d022decc..562d3ec7 100644
--- a/tfx_bsl/tfxio/parquet_tfxio_test.py
+++ b/tfx_bsl/tfxio/parquet_tfxio_test.py
@@ -14,31 +14,29 @@
 """Tests for tfx_bsl.tfxio.parquet_tfxio."""
 
 import os
-import pytest
 import pickle
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import pandas as pd
 import pyarrow as pa
 import pyarrow.parquet as pq
+import pytest
 import tensorflow as tf
-from tfx_bsl.tfxio import parquet_tfxio
-from tfx_bsl.tfxio import telemetry_test_util
-from tfx_bsl.tfxio import tensor_representation_util
-
-from google.protobuf import text_format
+from absl import flags
 from absl.testing import absltest
+from apache_beam.testing import util as beam_testing_util
+from google.protobuf import text_format
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import parquet_tfxio, telemetry_test_util, tensor_representation_util
+
 FLAGS = flags.FLAGS
 _COLUMN_NAMES = ["int_feature", "float_feature", "string_feature"]
 _TELEMETRY_DESCRIPTORS = ["Some", "Component"]
 _ROWS = {
     "int_feature": [[1], [2]],
     "float_feature": [[2.0], [3.0]],
-    "string_feature": [["abc"], ["xyz"]]
+    "string_feature": [["abc"], ["xyz"]],
 }
 _NUM_ROWS = len(next(iter(_ROWS)))
 
@@ -68,7 +66,9 @@
     max: 2
   }
   }
-  """, schema_pb2.Schema())
+  """,
+    schema_pb2.Schema(),
+)
 
 _UNORDERED_SCHEMA = text_format.Parse(
     """
@@ -96,354 +96,385 @@
     max: 2
   }
   }
-  """, schema_pb2.Schema())
-
-_EXPECTED_ARROW_SCHEMA = pa.schema([
-    pa.field("int_feature", pa.large_list(pa.int64())),
-    pa.field("float_feature", pa.large_list(pa.float32())),
-    pa.field("string_feature", pa.large_list(pa.large_binary()))
-])
-
-_EXPECTED_PROJECTED_ARROW_SCHEMA = pa.schema([
-    pa.field("int_feature", pa.large_list(pa.int64())),
-    pa.field("float_feature", pa.large_list(pa.float32())),
-])
+  """,
+    schema_pb2.Schema(),
+)
+
+_EXPECTED_ARROW_SCHEMA = pa.schema(
+    [
+        pa.field("int_feature", pa.large_list(pa.int64())),
+        pa.field("float_feature", pa.large_list(pa.float32())),
+        pa.field("string_feature", pa.large_list(pa.large_binary())),
+    ]
+)
+
+_EXPECTED_PROJECTED_ARROW_SCHEMA = pa.schema(
+    [
+        pa.field("int_feature", pa.large_list(pa.int64())),
+        pa.field("float_feature", pa.large_list(pa.float32())),
+    ]
+)
 
 _EXPECTED_COLUMN_VALUES = {
-    "int_feature":
-        pa.array([[1], [2]], type=pa.large_list(pa.int64())),
-    "float_feature":
-        pa.array([[2.0], [3.0]], type=pa.large_list(pa.float32())),
-    "string_feature":
-        pa.array([[b"abc"], [b"xyz"]], type=pa.large_list(pa.large_binary())),
+    "int_feature": pa.array([[1], [2]], type=pa.large_list(pa.int64())),
+    "float_feature": pa.array([[2.0], [3.0]], type=pa.large_list(pa.float32())),
+    "string_feature": pa.array(
+        [[b"abc"], [b"xyz"]], type=pa.large_list(pa.large_binary())
+    ),
 }
 
 _EXPECTED_RAW_RECORDS = [
-    pickle.dumps({
-        "int_feature": [1],
-        "float_feature": [2.0],
-        "string_feature": [b"abc"],
-    }),
-    pickle.dumps({
-        "int_feature": [2],
-        "float_feature": [3.0],
-        "string_feature": [b"xyz"],
-    }),
+    pickle.dumps(
+        {
+            "int_feature": [1],
+            "float_feature": [2.0],
+            "string_feature": [b"abc"],
+        }
+    ),
+    pickle.dumps(
+        {
+            "int_feature": [2],
+            "float_feature": [3.0],
+            "string_feature": [b"xyz"],
+        }
+    ),
 ]
 
 
 def _WriteInputs(filename):
-  df = pd.DataFrame(_ROWS)
-  table = pa.Table.from_pandas(df, schema=_EXPECTED_ARROW_SCHEMA)
-  pq.write_table(table, filename)
+    df = pd.DataFrame(_ROWS)
+    table = pa.Table.from_pandas(df, schema=_EXPECTED_ARROW_SCHEMA)
+    pq.write_table(table, filename)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class ParquetRecordTest(absltest.TestCase):
-
-  @classmethod
-  def setUpClass(cls):
-    super().setUpClass()
-    cls._example_file = os.path.join(FLAGS.test_tmpdir, "parquettest",
-                                     "examples.parquet")
-    tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
-    _WriteInputs(cls._example_file)
-
-  def testImplicitTensorRepresentations(self):
-    """Tests inferring of tensor representation."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=_UNORDERED_SCHEMA,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-    self.assertEqual(
-        {
-            "int_feature":
-                text_format.Parse(
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._example_file = os.path.join(
+            FLAGS.test_tmpdir, "parquettest", "examples.parquet"
+        )
+        tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
+        _WriteInputs(cls._example_file)
+
+    def testImplicitTensorRepresentations(self):
+        """Tests inferring of tensor representation."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=_COLUMN_NAMES,
+            schema=_UNORDERED_SCHEMA,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+        self.assertEqual(
+            {
+                "int_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "int_feature"}""",
-                    schema_pb2.TensorRepresentation()),
-            "float_feature":
-                text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "float_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "float_feature"}""",
-                    schema_pb2.TensorRepresentation()),
-            "string_feature":
-                text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "string_feature": text_format.Parse(
                     """varlen_sparse_tensor { column_name: "string_feature" }""",
-                    schema_pb2.TensorRepresentation()),
-        }, tfxio.TensorRepresentations())
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 3)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("string_feature", dict_of_tensors)
-
-    pipeline = beam.Pipeline()
-    record_batch_pcoll = (pipeline | tfxio.BeamSource(batch_size=_NUM_ROWS))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = pipeline.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        _TELEMETRY_DESCRIPTORS, "parquet",
-                                        "parquet")
-
-  def testExplicitTensorRepresentations(self):
-    """Tests when the tensor representation is explicitely provided in the schema."""
-    schema = schema_pb2.Schema()
-    schema.CopyFrom(_SCHEMA)
-    tensor_representations = {
-        "my_feature":
-            text_format.Parse(
+                    schema_pb2.TensorRepresentation(),
+                ),
+            },
+            tfxio.TensorRepresentations(),
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 3)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("string_feature", dict_of_tensors)
+
+        pipeline = beam.Pipeline()
+        record_batch_pcoll = pipeline | tfxio.BeamSource(batch_size=_NUM_ROWS)
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = pipeline.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self, pipeline_result, _TELEMETRY_DESCRIPTORS, "parquet", "parquet"
+        )
+
+    def testExplicitTensorRepresentations(self):
+        """Tests when the tensor representation is explicitely provided in the schema."""
+        schema = schema_pb2.Schema()
+        schema.CopyFrom(_SCHEMA)
+        tensor_representations = {
+            "my_feature": text_format.Parse(
                 """
           dense_tensor {
            column_name: "string_feature"
            shape { dim { size: 1 } }
            default_value { bytes_value: "abc" }
-         }""", schema_pb2.TensorRepresentation())
-    }
-    schema.tensor_representation_group[""].CopyFrom(
-        schema_pb2.TensorRepresentationGroup(
-            tensor_representation=tensor_representations))
-
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=schema,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-    expected_tensor_representations = {
-        "int_feature":
-            text_format.Parse(
+         }""",
+                schema_pb2.TensorRepresentation(),
+            )
+        }
+        schema.tensor_representation_group[""].CopyFrom(
+            schema_pb2.TensorRepresentationGroup(
+                tensor_representation=tensor_representations
+            )
+        )
+
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=_COLUMN_NAMES,
+            schema=schema,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+
+        expected_tensor_representations = {
+            "int_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "int_feature"}""",
-                schema_pb2.TensorRepresentation()),
-        "float_feature":
-            text_format.Parse(
+                schema_pb2.TensorRepresentation(),
+            ),
+            "float_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "float_feature"}""",
-                schema_pb2.TensorRepresentation()),
-    }
-    expected_tensor_representations.update(tensor_representations)
-    self.assertEqual(expected_tensor_representations,
-                     tfxio.TensorRepresentations())
-
-  def testReadMultipleFiles(self):
-    """Tests multiple file input patterns."""
-    num_copies = 3
-    # The TFXIO will create multiple read nodes for the file.
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=[self._example_file] * num_copies,
-        column_names=_COLUMN_NAMES,
-        schema=_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, num_copies)
-      for record_batch in record_batch_list:
-        self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testReadRawRecords(self):
-    """Tests reading raw records."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=_SCHEMA)
-
-    with beam.Pipeline() as p:
-      raw_record_pcoll = (p | tfxio.RawRecordBeamSource())
-      beam_testing_util.assert_that(
-          raw_record_pcoll, beam_testing_util.equal_to(_EXPECTED_RAW_RECORDS))
-
-  def testReadRawRecordsMultipleFiles(self):
-    """Tests reading raw records from multiple files."""
-    num_copies = 3
-    # The TFXIO will create multiple read nodes for the file.
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=[self._example_file] * num_copies,
-        column_names=_COLUMN_NAMES,
-        schema=_SCHEMA)
-
-    with beam.Pipeline() as p:
-      raw_record_pcoll = (p | tfxio.RawRecordBeamSource())
-      beam_testing_util.assert_that(
-          raw_record_pcoll,
-          beam_testing_util.equal_to(_EXPECTED_RAW_RECORDS * num_copies))
-
-  def testReadRawRecordsVsBeamSource(self):
-    """Tests reading of raw records and conversion to RecordBatches."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      for record_batch in record_batch_list:
-        self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          p | "ReadRawRecords" >> tfxio.RawRecordBeamSource()
-          |
-          "RawRecordsToRecordBatch" >> tfxio.RawRecordToRecordBatch(_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testProjection(self):
-    """Test projecting of a TFXIO."""
-    schema = schema_pb2.Schema()
-    schema.CopyFrom(_UNORDERED_SCHEMA)
-    tensor_representations = {
-        "string_tensor":
-            schema_pb2.TensorRepresentation(
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
+        expected_tensor_representations.update(tensor_representations)
+        self.assertEqual(expected_tensor_representations, tfxio.TensorRepresentations())
+
+    def testReadMultipleFiles(self):
+        """Tests multiple file input patterns."""
+        num_copies = 3
+        # The TFXIO will create multiple read nodes for the file.
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=[self._example_file] * num_copies,
+            column_names=_COLUMN_NAMES,
+            schema=_SCHEMA,
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, num_copies)
+            for record_batch in record_batch_list:
+                self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testReadRawRecords(self):
+        """Tests reading raw records."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file, column_names=_COLUMN_NAMES, schema=_SCHEMA
+        )
+
+        with beam.Pipeline() as p:
+            raw_record_pcoll = p | tfxio.RawRecordBeamSource()
+            beam_testing_util.assert_that(
+                raw_record_pcoll, beam_testing_util.equal_to(_EXPECTED_RAW_RECORDS)
+            )
+
+    def testReadRawRecordsMultipleFiles(self):
+        """Tests reading raw records from multiple files."""
+        num_copies = 3
+        # The TFXIO will create multiple read nodes for the file.
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=[self._example_file] * num_copies,
+            column_names=_COLUMN_NAMES,
+            schema=_SCHEMA,
+        )
+
+        with beam.Pipeline() as p:
+            raw_record_pcoll = p | tfxio.RawRecordBeamSource()
+            beam_testing_util.assert_that(
+                raw_record_pcoll,
+                beam_testing_util.equal_to(_EXPECTED_RAW_RECORDS * num_copies),
+            )
+
+    def testReadRawRecordsVsBeamSource(self):
+        """Tests reading of raw records and conversion to RecordBatches."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file, column_names=_COLUMN_NAMES, schema=_SCHEMA
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            for record_batch in record_batch_list:
+                self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = (
+                p
+                | "ReadRawRecords" >> tfxio.RawRecordBeamSource()
+                | "RawRecordsToRecordBatch" >> tfxio.RawRecordToRecordBatch(_NUM_ROWS)
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testProjection(self):
+        """Test projecting of a TFXIO."""
+        schema = schema_pb2.Schema()
+        schema.CopyFrom(_UNORDERED_SCHEMA)
+        tensor_representations = {
+            "string_tensor": schema_pb2.TensorRepresentation(
                 dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
-                    column_name="string_feature")),
-        "float_tensor":
-            schema_pb2.TensorRepresentation(
+                    column_name="string_feature"
+                )
+            ),
+            "float_tensor": schema_pb2.TensorRepresentation(
                 sparse_tensor=schema_pb2.TensorRepresentation.SparseTensor(
                     dense_shape=schema_pb2.FixedShape(
-                        dim=[schema_pb2.FixedShape.Dim(size=10)]),
+                        dim=[schema_pb2.FixedShape.Dim(size=10)]
+                    ),
                     index_column_names=["int_feature"],
-                    value_column_name="float_feature")),
-    }
-    tensor_representation_util.SetTensorRepresentationsInSchema(
-        schema, tensor_representations)
-
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=schema,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-    projected_tfxio = tfxio.Project(["float_tensor"])
-
-    # The projected_tfxio has the projected schema
-    self.assertTrue(
-        projected_tfxio.ArrowSchema().equals(_EXPECTED_PROJECTED_ARROW_SCHEMA))
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_PROJECTED_ARROW_SCHEMA)
-      expected_schema = projected_tfxio.ArrowSchema()
-      self.assertListEqual(
-          record_batch.schema.names, expected_schema.names,
-          "actual: {}; expected: {}".format(record_batch.schema.names,
-                                            expected_schema.names))
-      self.assertListEqual(
-          record_batch.schema.types, expected_schema.types,
-          "actual: {}; expected: {}".format(record_batch.schema.types,
-                                            expected_schema.types))
-      tensor_adapter = projected_tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 1)
-      self.assertIn("float_tensor", dict_of_tensors)
-
-    pipeline = beam.Pipeline()
-    record_batch_pcoll = (
-        pipeline | projected_tfxio.BeamSource(batch_size=_NUM_ROWS))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = pipeline.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        _TELEMETRY_DESCRIPTORS, "parquet",
-                                        "parquet")
-
-  def testOptionalSchema(self):
-    """Tests when the schema is not provided."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-    self.assertEqual(tfxio.ArrowSchema(), _EXPECTED_ARROW_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    pipeline = beam.Pipeline()
-    record_batch_pcoll = (pipeline | tfxio.BeamSource(batch_size=_NUM_ROWS))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = pipeline.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        _TELEMETRY_DESCRIPTORS, "parquet",
-                                        "parquet")
-
-  def testUnorderedSchema(self):
-    """Tests various valid schemas."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=_COLUMN_NAMES,
-        schema=_UNORDERED_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testOptionalColumnNames(self):
-    """Tests when column names are not provided."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file, schema=_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testOptionalColumnNamesAndSchema(self):
-    """Tests when schema and column names are not provided."""
-    tfxio = parquet_tfxio.ParquetTFXIO(file_pattern=self._example_file)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testSubsetOfColumnNamesWithCompleteSchema(self):
-    """Tests when column names is a subset of schema features."""
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=["int_feature"],
-        schema=_SCHEMA)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      expected_arrow_schema = pa.schema([
-          pa.field("int_feature", pa.large_list(pa.int64())),
-      ])
-      self._ValidateRecordBatch(record_batch, expected_arrow_schema)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testSubsetOfColumnNamesWithSubsetSchema(self):
-    """Tests when column names and schema features are a subset of columns."""
-    schema = text_format.Parse(
-        """
+                    value_column_name="float_feature",
+                )
+            ),
+        }
+        tensor_representation_util.SetTensorRepresentationsInSchema(
+            schema, tensor_representations
+        )
+
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=_COLUMN_NAMES,
+            schema=schema,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+
+        projected_tfxio = tfxio.Project(["float_tensor"])
+
+        # The projected_tfxio has the projected schema
+        self.assertTrue(
+            projected_tfxio.ArrowSchema().equals(_EXPECTED_PROJECTED_ARROW_SCHEMA)
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_PROJECTED_ARROW_SCHEMA)
+            expected_schema = projected_tfxio.ArrowSchema()
+            self.assertListEqual(
+                record_batch.schema.names,
+                expected_schema.names,
+                f"actual: {record_batch.schema.names}; expected: {expected_schema.names}",
+            )
+            self.assertListEqual(
+                record_batch.schema.types,
+                expected_schema.types,
+                f"actual: {record_batch.schema.types}; expected: {expected_schema.types}",
+            )
+            tensor_adapter = projected_tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 1)
+            self.assertIn("float_tensor", dict_of_tensors)
+
+        pipeline = beam.Pipeline()
+        record_batch_pcoll = pipeline | projected_tfxio.BeamSource(batch_size=_NUM_ROWS)
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = pipeline.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self, pipeline_result, _TELEMETRY_DESCRIPTORS, "parquet", "parquet"
+        )
+
+    def testOptionalSchema(self):
+        """Tests when the schema is not provided."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=_COLUMN_NAMES,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+
+        self.assertEqual(tfxio.ArrowSchema(), _EXPECTED_ARROW_SCHEMA)
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        pipeline = beam.Pipeline()
+        record_batch_pcoll = pipeline | tfxio.BeamSource(batch_size=_NUM_ROWS)
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = pipeline.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self, pipeline_result, _TELEMETRY_DESCRIPTORS, "parquet", "parquet"
+        )
+
+    def testUnorderedSchema(self):
+        """Tests various valid schemas."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=_COLUMN_NAMES,
+            schema=_UNORDERED_SCHEMA,
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testOptionalColumnNames(self):
+        """Tests when column names are not provided."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file, schema=_SCHEMA
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testOptionalColumnNamesAndSchema(self):
+        """Tests when schema and column names are not provided."""
+        tfxio = parquet_tfxio.ParquetTFXIO(file_pattern=self._example_file)
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, _EXPECTED_ARROW_SCHEMA)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testSubsetOfColumnNamesWithCompleteSchema(self):
+        """Tests when column names is a subset of schema features."""
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file,
+            column_names=["int_feature"],
+            schema=_SCHEMA,
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            expected_arrow_schema = pa.schema(
+                [
+                    pa.field("int_feature", pa.large_list(pa.int64())),
+                ]
+            )
+            self._ValidateRecordBatch(record_batch, expected_arrow_schema)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testSubsetOfColumnNamesWithSubsetSchema(self):
+        """Tests when column names and schema features are a subset of columns."""
+        schema = text_format.Parse(
+            """
       feature {
       name: "int_feature"
       type: INT
@@ -452,46 +483,50 @@ def testSubsetOfColumnNamesWithSubsetSchema(self):
         max: 2
       }
       }
-      """, schema_pb2.Schema())
-
-    tfxio = parquet_tfxio.ParquetTFXIO(
-        file_pattern=self._example_file,
-        column_names=["int_feature"],
-        schema=schema)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      expected_arrow_schema = pa.schema([
-          pa.field("int_feature", pa.large_list(pa.int64())),
-      ])
-      self._ValidateRecordBatch(record_batch, expected_arrow_schema)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (p | tfxio.BeamSource(batch_size=_NUM_ROWS))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def _ValidateRecordBatch(self, record_batch, expected_arrow_schema):
-    self.assertIsInstance(record_batch, pa.RecordBatch)
-    self.assertEqual(record_batch.num_rows, 2)
-    # When reading the parquet files and then transforming them to
-    # RecordBatches, metadata is populated, specifically the pandas metadata.
-    # We do not assert that metadata.
-    self.assertListEqual(
-        record_batch.schema.names, expected_arrow_schema.names,
-        "Expected: {} ; got {}".format(expected_arrow_schema.names,
-                                       record_batch.schema.names))
-    self.assertListEqual(
-        record_batch.schema.types, expected_arrow_schema.types,
-        "Expected: {} ; got {}".format(expected_arrow_schema.types,
-                                       record_batch.schema.types))
-    for i, field in enumerate(record_batch.schema):
-      self.assertTrue(
-          record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[field.name]),
-          "Column {} did not match ({} vs {}).".format(
-              field.name, record_batch.column(i),
-              _EXPECTED_COLUMN_VALUES[field.name]))
+      """,
+            schema_pb2.Schema(),
+        )
+
+        tfxio = parquet_tfxio.ParquetTFXIO(
+            file_pattern=self._example_file, column_names=["int_feature"], schema=schema
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            expected_arrow_schema = pa.schema(
+                [
+                    pa.field("int_feature", pa.large_list(pa.int64())),
+                ]
+            )
+            self._ValidateRecordBatch(record_batch, expected_arrow_schema)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=_NUM_ROWS)
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def _ValidateRecordBatch(self, record_batch, expected_arrow_schema):
+        self.assertIsInstance(record_batch, pa.RecordBatch)
+        self.assertEqual(record_batch.num_rows, 2)
+        # When reading the parquet files and then transforming them to
+        # RecordBatches, metadata is populated, specifically the pandas metadata.
+        # We do not assert that metadata.
+        self.assertListEqual(
+            record_batch.schema.names,
+            expected_arrow_schema.names,
+            f"Expected: {expected_arrow_schema.names} ; got {record_batch.schema.names}",
+        )
+        self.assertListEqual(
+            record_batch.schema.types,
+            expected_arrow_schema.types,
+            f"Expected: {expected_arrow_schema.types} ; got {record_batch.schema.types}",
+        )
+        for i, field in enumerate(record_batch.schema):
+            self.assertTrue(
+                record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[field.name]),
+                f"Column {field.name} did not match ({record_batch.column(i)} vs {_EXPECTED_COLUMN_VALUES[field.name]}).",
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/raw_tf_record.py b/tfx_bsl/tfxio/raw_tf_record.py
index 6c68b531..30e31e9b 100644
--- a/tfx_bsl/tfxio/raw_tf_record.py
+++ b/tfx_bsl/tfxio/raw_tf_record.py
@@ -18,175 +18,191 @@
 import apache_beam as beam
 import pyarrow as pa
 import tensorflow as tf  # pylint: disable=g-explicit-tensorflow-version-import
-from tfx_bsl.coders import batch_util
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import dataset_util
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tfxio
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import batch_util
+from tfx_bsl.tfxio import (
+    dataset_options,
+    dataset_util,
+    record_based_tfxio,
+    tensor_adapter,
+    tfxio,
+)
+
 
 class _RawRecordTFXIO(record_based_tfxio.RecordBasedTFXIO):
-  """Base class for raw record TFXIO implementations.
-
-  A raw record TFXIO decodes a record based on-disk format into an
-  RecordBatches of one column that contains the raw records. Its TensorAdapter
-  converts one such RecordBatch into a dense string tensor that contains
-  the raw records.
-
-  `raw_record_column_name` determines the name of the raw record column and
-  the tensor.
-  """
-
-  def __init__(self, raw_record_column_name: Text,
-               telemetry_descriptors: List[Text],
-               physical_format: Text):
-    assert raw_record_column_name is not None
-    super().__init__(
-        raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        logical_format="bytes",
-        physical_format=physical_format)
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return True
-
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(raw_record_pcoll: beam.pvalue.PCollection):
-      return (
-          raw_record_pcoll
-          | "Batch"
-          >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
-          | "ToRecordBatch"
-          >> beam.Map(_BatchedRecordsToArrow, self.raw_record_column_name)
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    # The only column is the raw record column.
-    return pa.schema([])
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return {
-        self.raw_record_column_name
-        or "": schema_pb2.TensorRepresentation(
-            dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
-                column_name=self.raw_record_column_name,
-                shape=schema_pb2.FixedShape(),  # scalar
-            )
+    """Base class for raw record TFXIO implementations.
+
+    A raw record TFXIO decodes a record based on-disk format into an
+    RecordBatches of one column that contains the raw records. Its TensorAdapter
+    converts one such RecordBatch into a dense string tensor that contains
+    the raw records.
+
+    `raw_record_column_name` determines the name of the raw record column and
+    the tensor.
+    """
+
+    def __init__(
+        self,
+        raw_record_column_name: str,
+        telemetry_descriptors: List[str],
+        physical_format: str,
+    ):
+        assert raw_record_column_name is not None
+        super().__init__(
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            logical_format="bytes",
+            physical_format=physical_format,
         )
-    }
 
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    assert (not tensor_names or
-            (len(tensor_names) == 1
-             and tensor_names[0] == self.raw_record_column_name)), (
-                 str(tensor_names))
-    return self
+    def SupportAttachingRawRecords(self) -> bool:
+        return True
+
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(raw_record_pcoll: beam.pvalue.PCollection):
+            return (
+                raw_record_pcoll
+                | "Batch"
+                >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
+                | "ToRecordBatch"
+                >> beam.Map(_BatchedRecordsToArrow, self.raw_record_column_name)
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        # The only column is the raw record column.
+        return pa.schema([])
 
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return {
+            self.raw_record_column_name or "": schema_pb2.TensorRepresentation(
+                dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
+                    column_name=self.raw_record_column_name,
+                    shape=schema_pb2.FixedShape(),  # scalar
+                )
+            )
+        }
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        assert not tensor_names or (
+            len(tensor_names) == 1 and tensor_names[0] == self.raw_record_column_name
+        ), str(tensor_names)
+        return self
 
-def _BatchedRecordsToArrow(records: List[bytes],
-                           raw_record_column_name: Text) -> pa.RecordBatch:
-  raw_record_column = record_based_tfxio.CreateRawRecordColumn(records)
-  return pa.RecordBatch.from_arrays(
-      [raw_record_column], [raw_record_column_name])
+
+def _BatchedRecordsToArrow(
+    records: List[bytes], raw_record_column_name: str
+) -> pa.RecordBatch:
+    raw_record_column = record_based_tfxio.CreateRawRecordColumn(records)
+    return pa.RecordBatch.from_arrays([raw_record_column], [raw_record_column_name])
 
 
 class RawBeamRecordTFXIO(_RawRecordTFXIO):
-  """TFXIO for raw records in pcoll[bytes].
-
-  This is a special TFXIO that does not actually do I/O -- it relies on the
-  caller to prepare a PCollection of bytes.
-  """
-
-  def __init__(self,
-               physical_format: Text,
-               raw_record_column_name: Text,
-               telemetry_descriptors: List[Text]):
-    """Initializer.
-
-    Args:
-      physical_format: The physical format that describes where the input
-        pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
-        "tfrecord".
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
+    """TFXIO for raw records in pcoll[bytes].
+
+    This is a special TFXIO that does not actually do I/O -- it relies on the
+    caller to prepare a PCollection of bytes.
     """
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format=physical_format,
-        raw_record_column_name=raw_record_column_name)
 
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return (beam.ptransform_fn(lambda x: x)()
+    def __init__(
+        self,
+        physical_format: str,
+        raw_record_column_name: str,
+        telemetry_descriptors: List[str],
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          physical_format: The physical format that describes where the input
+            pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
+            "tfrecord".
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+        """
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format=physical_format,
+            raw_record_column_name=raw_record_column_name,
+        )
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return (
+            beam.ptransform_fn(lambda x: x)()
             .with_input_types(bytes)
-            .with_output_types(bytes))
+            .with_output_types(bytes)
+        )
 
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    raise NotImplementedError
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        raise NotImplementedError
 
 
 class RawTfRecordTFXIO(_RawRecordTFXIO):
-  """Raw record TFXIO for TFRecord format."""
-
-  def __init__(self, file_pattern: Union[Text, List[Text]],
-               raw_record_column_name: Text,
-               telemetry_descriptors: List[Text]):
-    """Initializer.
-
-    Args:
-      file_pattern: One or a list of glob patterns. If a list, must not be
-        empty.
-      raw_record_column_name: Name of the raw record column.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-    """
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format="tfrecords_gzip",
-        raw_record_column_name=raw_record_column_name)
-    if not isinstance(file_pattern, list):
-      file_pattern = [file_pattern]
-    assert file_pattern, "Must provide at least one file pattern."
-    self._file_pattern = file_pattern
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return record_based_tfxio.ReadTfRecord(self._file_pattern)
-
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-
-    return (dataset_util.make_tf_record_dataset(
-        file_pattern=self._file_pattern,
-        batch_size=options.batch_size,
-        drop_final_batch=options.drop_final_batch,
-        num_epochs=options.num_epochs,
-        shuffle=options.shuffle,
-        shuffle_buffer_size=options.shuffle_buffer_size,
-        shuffle_seed=options.shuffle_seed,
-        sloppy_ordering=options.sloppy_ordering)
+    """Raw record TFXIO for TFRecord format."""
+
+    def __init__(
+        self,
+        file_pattern: Union[str, List[str]],
+        raw_record_column_name: str,
+        telemetry_descriptors: List[str],
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          file_pattern: One or a list of glob patterns. If a list, must not be
+            empty.
+          raw_record_column_name: Name of the raw record column.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+        """
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format="tfrecords_gzip",
+            raw_record_column_name=raw_record_column_name,
+        )
+        if not isinstance(file_pattern, list):
+            file_pattern = [file_pattern]
+        assert file_pattern, "Must provide at least one file pattern."
+        self._file_pattern = file_pattern
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return record_based_tfxio.ReadTfRecord(self._file_pattern)
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        return (
+            dataset_util.make_tf_record_dataset(
+                file_pattern=self._file_pattern,
+                batch_size=options.batch_size,
+                drop_final_batch=options.drop_final_batch,
+                num_epochs=options.num_epochs,
+                shuffle=options.shuffle,
+                shuffle_buffer_size=options.shuffle_buffer_size,
+                shuffle_seed=options.shuffle_seed,
+                sloppy_ordering=options.sloppy_ordering,
+            )
             .map(lambda records: {self._raw_record_column_name: records})
-            .prefetch(buffer_size=tf.data.experimental.AUTOTUNE))
+            .prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+        )
diff --git a/tfx_bsl/tfxio/raw_tf_record_test.py b/tfx_bsl/tfxio/raw_tf_record_test.py
index 4ee0b104..bdffc5b7 100644
--- a/tfx_bsl/tfxio/raw_tf_record_test.py
+++ b/tfx_bsl/tfxio/raw_tf_record_test.py
@@ -14,157 +14,170 @@
 """Tests for tfx_bsl.tfxio.raw_tf_record."""
 
 import os
-import pytest
 import unittest
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import pyarrow as pa
+import pytest
 import tensorflow as tf  # pylint: disable=g-explicit-tensorflow-version-import
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import raw_tf_record
-from tfx_bsl.tfxio import telemetry_test_util
-
+from absl import flags
 from absl.testing import absltest
+from apache_beam.testing import util as beam_testing_util
 
+from tfx_bsl.tfxio import dataset_options, raw_tf_record, telemetry_test_util
 
 FLAGS = flags.FLAGS
 _RAW_RECORDS = [b"record1", b"record2", b"record3"]
 
 
 def _WriteRawRecords(filename):
-  with tf.io.TFRecordWriter(filename, "GZIP") as w:
-    for r in _RAW_RECORDS:
-      w.write(r)
+    with tf.io.TFRecordWriter(filename, "GZIP") as w:
+        for r in _RAW_RECORDS:
+            w.write(r)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class RawTfRecordTest(absltest.TestCase):
-
-  @classmethod
-  def setUpClass(cls):
-    super().setUpClass()
-    cls._raw_record_file = os.path.join(
-        FLAGS.test_tmpdir, "rawtfrecordtest", "input.recordio.gz")
-    tf.io.gfile.makedirs(os.path.dirname(cls._raw_record_file))
-    _WriteRawRecords(cls._raw_record_file)
-
-  def testRecordBatchAndTensorAdapter(self):
-    column_name = "raw_record"
-    telemetry_descriptors = ["some", "component"]
-    tfxio = raw_tf_record.RawTfRecordTFXIO(
-        self._raw_record_file, column_name,
-        telemetry_descriptors=telemetry_descriptors)
-    expected_type = pa.large_list(pa.large_binary())
-
-    got_schema = tfxio.ArrowSchema()
-    self.assertTrue(got_schema.equals(
-        pa.schema([pa.field(column_name, expected_type)])),
-                    "got: {}".format(got_schema))
-
-    def _AssertFn(record_batches):
-      self.assertLen(record_batches, 1)
-      record_batch = record_batches[0]
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      self.assertTrue(record_batch.columns[0].equals(
-          pa.array([[r] for r in _RAW_RECORDS], type=expected_type)))
-      tensor_adapter = tfxio.TensorAdapter()
-      tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(tensors, 1)
-      self.assertIn(column_name, tensors)
-
-    p = beam.Pipeline()
-    record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_RAW_RECORDS))
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(self, pipeline_result,
-                                        telemetry_descriptors, "bytes",
-                                        "tfrecords_gzip")
-
-  def testProject(self):
-    column_name = "raw_record"
-    tfxio = raw_tf_record.RawTfRecordTFXIO(
-        self._raw_record_file,
-        column_name,
-        telemetry_descriptors=["some", "component"])
-    projected = tfxio.Project([column_name])
-    self.assertTrue(tfxio.ArrowSchema().equals(projected.ArrowSchema()))
-    self.assertEqual(tfxio.TensorRepresentations(),
-                     projected.TensorRepresentations())
-
-    with self.assertRaises(AssertionError):
-      tfxio.Project(["some_other_name"])
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testTensorFlowDataset(self):
-    column_name = "raw_record"
-    tfxio = raw_tf_record.RawTfRecordTFXIO(
-        self._raw_record_file,
-        column_name,
-        telemetry_descriptors=["some", "component"])
-    ds = tfxio.TensorFlowDataset(dataset_options.TensorFlowDatasetOptions(
-        batch_size=1,
-        shuffle=False,
-        num_epochs=1,
-        reader_num_threads=1,
-        sloppy_ordering=False))
-    actual_records = [d[column_name].numpy()[0] for d in ds]
-    self.assertEqual(actual_records, _RAW_RECORDS)
-
-  def testTensorFlowDatasetGraphMode(self):
-    column_name = "raw_record"
-    tfxio = raw_tf_record.RawTfRecordTFXIO(
-        self._raw_record_file,
-        column_name,
-        telemetry_descriptors=["some", "component"])
-    actual_records = []
-    with tf.compat.v1.Graph().as_default():
-      ds = tfxio.TensorFlowDataset(
-          dataset_options.TensorFlowDatasetOptions(
-              batch_size=1,
-              shuffle=False,
-              num_epochs=1,
-              reader_num_threads=1,
-              sloppy_ordering=False))
-      iterator = tf.compat.v1.data.make_one_shot_iterator(ds)
-      next_elem = iterator.get_next()
-      with tf.compat.v1.Session() as sess:
-        while True:
-          try:
-            actual_records.append(sess.run(next_elem)[column_name][0])
-          except tf.errors.OutOfRangeError:
-            break
-    self.assertEqual(actual_records, _RAW_RECORDS)
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._raw_record_file = os.path.join(
+            FLAGS.test_tmpdir, "rawtfrecordtest", "input.recordio.gz"
+        )
+        tf.io.gfile.makedirs(os.path.dirname(cls._raw_record_file))
+        _WriteRawRecords(cls._raw_record_file)
+
+    def testRecordBatchAndTensorAdapter(self):
+        column_name = "raw_record"
+        telemetry_descriptors = ["some", "component"]
+        tfxio = raw_tf_record.RawTfRecordTFXIO(
+            self._raw_record_file,
+            column_name,
+            telemetry_descriptors=telemetry_descriptors,
+        )
+        expected_type = pa.large_list(pa.large_binary())
+
+        got_schema = tfxio.ArrowSchema()
+        self.assertTrue(
+            got_schema.equals(pa.schema([pa.field(column_name, expected_type)])),
+            f"got: {got_schema}",
+        )
+
+        def _AssertFn(record_batches):
+            self.assertLen(record_batches, 1)
+            record_batch = record_batches[0]
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            self.assertTrue(
+                record_batch.columns[0].equals(
+                    pa.array([[r] for r in _RAW_RECORDS], type=expected_type)
+                )
+            )
+            tensor_adapter = tfxio.TensorAdapter()
+            tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(tensors, 1)
+            self.assertIn(column_name, tensors)
+
+        p = beam.Pipeline()
+        record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_RAW_RECORDS))
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self, pipeline_result, telemetry_descriptors, "bytes", "tfrecords_gzip"
+        )
+
+    def testProject(self):
+        column_name = "raw_record"
+        tfxio = raw_tf_record.RawTfRecordTFXIO(
+            self._raw_record_file,
+            column_name,
+            telemetry_descriptors=["some", "component"],
+        )
+        projected = tfxio.Project([column_name])
+        self.assertTrue(tfxio.ArrowSchema().equals(projected.ArrowSchema()))
+        self.assertEqual(
+            tfxio.TensorRepresentations(), projected.TensorRepresentations()
+        )
+
+        with self.assertRaises(AssertionError):
+            tfxio.Project(["some_other_name"])
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testTensorFlowDataset(self):
+        column_name = "raw_record"
+        tfxio = raw_tf_record.RawTfRecordTFXIO(
+            self._raw_record_file,
+            column_name,
+            telemetry_descriptors=["some", "component"],
+        )
+        ds = tfxio.TensorFlowDataset(
+            dataset_options.TensorFlowDatasetOptions(
+                batch_size=1,
+                shuffle=False,
+                num_epochs=1,
+                reader_num_threads=1,
+                sloppy_ordering=False,
+            )
+        )
+        actual_records = [d[column_name].numpy()[0] for d in ds]
+        self.assertEqual(actual_records, _RAW_RECORDS)
+
+    def testTensorFlowDatasetGraphMode(self):
+        column_name = "raw_record"
+        tfxio = raw_tf_record.RawTfRecordTFXIO(
+            self._raw_record_file,
+            column_name,
+            telemetry_descriptors=["some", "component"],
+        )
+        actual_records = []
+        with tf.compat.v1.Graph().as_default():
+            ds = tfxio.TensorFlowDataset(
+                dataset_options.TensorFlowDatasetOptions(
+                    batch_size=1,
+                    shuffle=False,
+                    num_epochs=1,
+                    reader_num_threads=1,
+                    sloppy_ordering=False,
+                )
+            )
+            iterator = tf.compat.v1.data.make_one_shot_iterator(ds)
+            next_elem = iterator.get_next()
+            with tf.compat.v1.Session() as sess:
+                while True:
+                    try:
+                        actual_records.append(sess.run(next_elem)[column_name][0])
+                    except tf.errors.OutOfRangeError:
+                        break
+        self.assertEqual(actual_records, _RAW_RECORDS)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class RawBeamRecordTest(absltest.TestCase):
-
-  def testE2E(self):
-    column_name = "raw_record"
-    tfxio = raw_tf_record.RawBeamRecordTFXIO(
-        physical_format="inmem",
-        raw_record_column_name=column_name,
-        telemetry_descriptors=["some", "component"])
-
-    def _AssertFn(record_batches):
-      self.assertLen(record_batches, 1)
-      record_batch = record_batches[0]
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(tensors, 1)
-      self.assertIn(column_name, tensors)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          p
-          | "CreateInMemRecords" >> beam.Create(_RAW_RECORDS)
-          | "BeamSource" >> tfxio.BeamSource(batch_size=len(_RAW_RECORDS)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+    def testE2E(self):
+        column_name = "raw_record"
+        tfxio = raw_tf_record.RawBeamRecordTFXIO(
+            physical_format="inmem",
+            raw_record_column_name=column_name,
+            telemetry_descriptors=["some", "component"],
+        )
+
+        def _AssertFn(record_batches):
+            self.assertLen(record_batches, 1)
+            record_batch = record_batches[0]
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(tensors, 1)
+            self.assertIn(column_name, tensors)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = (
+                p
+                | "CreateInMemRecords" >> beam.Create(_RAW_RECORDS)
+                | "BeamSource" >> tfxio.BeamSource(batch_size=len(_RAW_RECORDS))
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/record_based_tfxio.py b/tfx_bsl/tfxio/record_based_tfxio.py
index 0d08cc45..3d45c2cc 100644
--- a/tfx_bsl/tfxio/record_based_tfxio.py
+++ b/tfx_bsl/tfxio/record_based_tfxio.py
@@ -23,364 +23,387 @@
 import numpy as np
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import telemetry
-from tfx_bsl.tfxio import tfxio
+
+from tfx_bsl.tfxio import dataset_options, telemetry, tfxio
 
 
 class RecordBasedTFXIO(tfxio.TFXIO):
-  """Base class for all TFXIO implementations for record-based on-disk formats.
-
-  `RecordBasedTFXIO` offers the following abstractions that are unique to
-  record-based formats:
-
-  `SupportAttachingRawRecords()`: indicates whether this implementation
-    supports attaching the raw records as a `LargeList<LargeBinary>` column to
-    the produced RecordBatches upon request. If a subclass implements this
-    feature, then its `RawRecordToRecordBatch()` must consult
-    `self.raw_record_column_name`, and make sure that the produced RecordBatches
-    have the raw record column as the last column, with the given name (if not
-    None); otherwise it's guaranteed that the raw record column is not
-    requested (`self.raw_record_column_name` == None).
-
-  `RawRecordBeamSource()`: returns a PTransform that produces PCollection[bytes]
-    (of raw records).
-
-  RawRecordToRecordBatch(): returns a PTransform that takes `PCollection[bytes]`
-    (expected to be what's produced by `RawRecordBeamSource()`) and produces
-    `PCollection[RecordBatch]`. It's guaranteed that `BeamSource()` is a
-    composition of `RawRecordBeamSource()` and `RawRecordToRecordBatch()`.
-    This interface is useful if one wants to access both the raw records as
-    well as the RecordBatches, because beam does not do Common Sub-expression
-    Eliminination, it's more desirable to be able to cache the output of
-    `RawRecordBeamSource()` and feed it to `RawRecordToRecordBatch()` than
-    calling `BeamSource()` separately as redundant disk reads can be avoided.
-  """
-
-  def __init__(self, telemetry_descriptors: Optional[List[Text]],
-               logical_format: Text,
-               physical_format: Text,
-               raw_record_column_name: Optional[Text] = None):
-    super().__init__()
-    if not self.SupportAttachingRawRecords():
-      assert raw_record_column_name is None, (
-          "{} did not support attaching raw records, but requested.".format(
-              type(self)))
-    self._telemetry_descriptors = telemetry_descriptors
-    self._logical_format = logical_format
-    self._physical_format = physical_format
-    self._raw_record_column_name = raw_record_column_name
-
-  @property
-  def raw_record_column_name(self) -> Optional[Text]:
-    return self._raw_record_column_name
-
-  @property
-  def telemetry_descriptors(self) -> Optional[List[Text]]:
-    return self._telemetry_descriptors
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return False
-
-  def RawRecordBeamSource(self) -> beam.PTransform:
-    """Returns a PTransform that produces a PCollection[bytes].
-
-    Used together with RawRecordToRecordBatch(), it allows getting both the
-    PCollection of the raw records and the PCollection of the RecordBatch from
-    the same source. For example:
-
-    record_batch = pipeline | tfxio.BeamSource()
-    raw_record = pipeline | tfxio.RawRecordBeamSource()
-
-    would result in the files being read twice, while the following would only
-    read once:
-
-    raw_record = pipeline | tfxio.RawRecordBeamSource()
-    record_batch = raw_record | tfxio.RawRecordToRecordBatch()
+    """Base class for all TFXIO implementations for record-based on-disk formats.
+
+    `RecordBasedTFXIO` offers the following abstractions that are unique to
+    record-based formats:
+
+    `SupportAttachingRawRecords()`: indicates whether this implementation
+      supports attaching the raw records as a `LargeList<LargeBinary>` column to
+      the produced RecordBatches upon request. If a subclass implements this
+      feature, then its `RawRecordToRecordBatch()` must consult
+      `self.raw_record_column_name`, and make sure that the produced RecordBatches
+      have the raw record column as the last column, with the given name (if not
+      None); otherwise it's guaranteed that the raw record column is not
+      requested (`self.raw_record_column_name` == None).
+
+    `RawRecordBeamSource()`: returns a PTransform that produces PCollection[bytes]
+      (of raw records).
+
+    RawRecordToRecordBatch(): returns a PTransform that takes `PCollection[bytes]`
+      (expected to be what's produced by `RawRecordBeamSource()`) and produces
+      `PCollection[RecordBatch]`. It's guaranteed that `BeamSource()` is a
+      composition of `RawRecordBeamSource()` and `RawRecordToRecordBatch()`.
+      This interface is useful if one wants to access both the raw records as
+      well as the RecordBatches, because beam does not do Common Sub-expression
+      Eliminination, it's more desirable to be able to cache the output of
+      `RawRecordBeamSource()` and feed it to `RawRecordToRecordBatch()` than
+      calling `BeamSource()` separately as redundant disk reads can be avoided.
     """
 
-    @beam.typehints.with_input_types(Any)
-    @beam.typehints.with_output_types(bytes)
-    def _PTransformFn(pcoll_or_pipeline: Any):
-      return (pcoll_or_pipeline
-              | "ReadRawRecords" >> self._RawRecordBeamSourceInternal()
-              | "CollectRawRecordTelemetry" >> telemetry.ProfileRawRecords(
-                  self._telemetry_descriptors, self._logical_format,
-                  self._physical_format))
-
-    return beam.ptransform_fn(_PTransformFn)()
+    def __init__(
+        self,
+        telemetry_descriptors: Optional[List[str]],
+        logical_format: str,
+        physical_format: str,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        super().__init__()
+        if not self.SupportAttachingRawRecords():
+            assert (
+                raw_record_column_name is None
+            ), f"{type(self)} did not support attaching raw records, but requested."
+        self._telemetry_descriptors = telemetry_descriptors
+        self._logical_format = logical_format
+        self._physical_format = physical_format
+        self._raw_record_column_name = raw_record_column_name
+
+    @property
+    def raw_record_column_name(self) -> Optional[str]:
+        return self._raw_record_column_name
+
+    @property
+    def telemetry_descriptors(self) -> Optional[List[str]]:
+        return self._telemetry_descriptors
+
+    def SupportAttachingRawRecords(self) -> bool:
+        return False
+
+    def RawRecordBeamSource(self) -> beam.PTransform:
+        """Returns a PTransform that produces a PCollection[bytes].
+
+        Used together with RawRecordToRecordBatch(), it allows getting both the
+        PCollection of the raw records and the PCollection of the RecordBatch from
+        the same source. For example:
+
+        record_batch = pipeline | tfxio.BeamSource()
+        raw_record = pipeline | tfxio.RawRecordBeamSource()
+
+        would result in the files being read twice, while the following would only
+        read once:
+
+        raw_record = pipeline | tfxio.RawRecordBeamSource()
+        record_batch = raw_record | tfxio.RawRecordToRecordBatch()
+        """
+
+        @beam.typehints.with_input_types(Any)
+        @beam.typehints.with_output_types(bytes)
+        def _PTransformFn(pcoll_or_pipeline: Any):
+            return (
+                pcoll_or_pipeline
+                | "ReadRawRecords" >> self._RawRecordBeamSourceInternal()
+                | "CollectRawRecordTelemetry"
+                >> telemetry.ProfileRawRecords(
+                    self._telemetry_descriptors,
+                    self._logical_format,
+                    self._physical_format,
+                )
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def RawRecordToRecordBatch(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        """Returns a PTransform that converts raw records to Arrow RecordBatches.
+
+        The input PCollection must be from self.RawRecordBeamSource() (also see
+        the documentation for that method).
+
+        Args:
+        ----
+          batch_size: if not None, the `pa.RecordBatch` produced will be of the
+            specified size. Otherwise it's automatically tuned by Beam.
+        """
+
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(pcoll: beam.pvalue.PCollection):
+            return (
+                pcoll
+                | "RawRecordToRecordBatch"
+                >> self._RawRecordToRecordBatchInternal(batch_size)
+                | "CollectRecordBatchTelemetry"
+                >> telemetry.ProfileRecordBatches(
+                    self._telemetry_descriptors,
+                    self._logical_format,
+                    self._physical_format,
+                )
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    @abc.abstractmethod
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        """Returns a PTransform that produces a PCollection[bytes]."""
+
+    @abc.abstractmethod
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        """Returns a PTransform that converts raw records to Arrow RecordBatches."""
+        pass
+
+    @abc.abstractmethod
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        """Returns the Arrow schema that does not contain the raw record column.
+
+        Even if self.raw_record_column is not None.
+
+        Returns
+        -------
+          a pa.Schema.
+        """
+        pass
+
+    def ArrowSchema(self) -> pa.Schema:
+        schema = self._ArrowSchemaNoRawRecordColumn()
+        if self._raw_record_column_name is not None:
+            if schema.get_field_index(self._raw_record_column_name) != -1:
+                raise ValueError(
+                    f"Raw record column name {self._raw_record_column_name} collided with a column in the schema."
+                )
+            schema = schema.append(
+                pa.field(self._raw_record_column_name, pa.large_list(pa.large_binary()))
+            )
+        return schema
+
+    def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
+        @beam.typehints.with_input_types(Any)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(pcoll_or_pipeline: Any):
+            """Converts raw records to RecordBatches."""
+            return (
+                pcoll_or_pipeline
+                | "RawRecordBeamSource" >> self.RawRecordBeamSource()
+                | "RawRecordToRecordBatch" >> self.RawRecordToRecordBatch(batch_size)
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def _PopLabelFeatureFromDataset(
+        self, dataset: tf.data.Dataset, label_key: str
+    ) -> tf.data.Dataset:
+        """Create a dataset that isolates the label_key feature from all features.
+
+        Args:
+        ----
+          dataset: A dataset of dicts, where the keys are feature names, and values
+            are features.
+          label_key: The name of the feature that is isolated.
+
+        Returns:
+        -------
+          A dataset of tuple (label_key_feature, features_dict), where features_dict
+          does not contain label_key.
+        """
+        if label_key not in self.TensorRepresentations():
+            raise ValueError(
+                f"The `label_key` provided ({label_key}) must be one of the following tensors"
+                f"names: {self.TensorRepresentations().keys()}."
+            )
+        return dataset.map(lambda x: (x, x.pop(label_key)))
+
+    def RawRecordTensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        """Returns a Dataset that contains nested Datasets of raw records.
+
+        May not be implemented for some TFXIOs.
+
+        This should be used when RawTfRecordTFXIO.TensorFlowDataset does not
+        suffice. Namely, if there is some logical grouping of files which we need
+        to perform operations on, without applying the operation to each individual
+        group (i.e. shuffle).
+
+        The returned Dataset object is a dataset of datasets, where each nested
+        dataset is a dataset of serialized records. When shuffle=False (default),
+        the nested datasets are deterministically ordered. Each nested dataset can
+        represent multiple files. The files are merged into one dataset if the files
+        have the same format. For example:
+
+        ```
+        file_patterns = ['file_1', 'file_2', 'dir_1/*']
+        file_formats = ['recordio', 'recordio', 'sstable']
+        tfxio = SomeTFXIO(file_patterns, file_formats)
+        datasets = tfxio.RawRecordTensorFlowDataset(options)
+        ```
+        `datasets` would result in the following dataset: `[ds1, ds2]`. Where ds1
+        iterates over records from 'file_1' and 'file_2', and ds2 iterates over
+        records from files matched by 'dir_1/*'.
+
+        Example usage:
+        ```
+        tfxio = SomeTFXIO(file_patterns, file_formats)
+        ds = tfxio.RawRecordTensorFlowDataset(options=options)
+        ds = ds.flat_map(lambda x: x)
+        records = list(ds.as_numpy_iterator())
+        # iterating over `records` yields records from the each file in
+        # `file_patterns`. See `tf.data.Dataset.list_files` for more information
+        # about the order of files when expanding globs.
+        ```
+        Note that we need a flat_map, because `RawRecordTensorFlowDataset` returns
+        a dataset of datasets.
+
+        When shuffle=True, then the datasets not deterministically ordered,
+        but the contents of each nested dataset are deterministcally ordered.
+        For example, we may potentially have [ds2, ds1, ds3], where the
+        contents of ds1, ds2, and ds3 are all deterministcally ordered.
+
+        Args:
+        ----
+          options: A TensorFlowDatasetOptions object. Not all options will apply.
+        """
+        raise NotImplementedError
+
+
+def CreateRawRecordColumn(raw_records: Union[np.ndarray, List[bytes]]) -> pa.Array:
+    """Returns an Array that satisfies the requirement of a raw record column."""
+    return pa.LargeListArray.from_arrays(
+        np.arange(0, len(raw_records) + 1, dtype=np.int64),
+        pa.array(raw_records, type=pa.large_binary()),
+    )
 
-  def RawRecordToRecordBatch(self,
-                             batch_size: Optional[int] = None
-                            ) -> beam.PTransform:
-    """Returns a PTransform that converts raw records to Arrow RecordBatches.
 
-    The input PCollection must be from self.RawRecordBeamSource() (also see
-    the documentation for that method).
+def AppendRawRecordColumn(
+    record_batch: pa.RecordBatch,
+    column_name: str,
+    raw_records: List[bytes],
+    record_index_column_name: Optional[str] = None,
+) -> pa.RecordBatch:
+    """Appends `raw_records` as a new column in `record_batch`.
 
     Args:
-      batch_size: if not None, the `pa.RecordBatch` produced will be of the
-        specified size. Otherwise it's automatically tuned by Beam.
-    """
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(pcoll: beam.pvalue.PCollection):
-      return (pcoll
-              | "RawRecordToRecordBatch" >>
-              self._RawRecordToRecordBatchInternal(batch_size)
-              | "CollectRecordBatchTelemetry" >>
-              telemetry.ProfileRecordBatches(self._telemetry_descriptors,
-                                             self._logical_format,
-                                             self._physical_format))
+    ----
+      record_batch: The RecordBatch to append to.
+      column_name: The name of the column to be appended.
+      raw_records: A list of bytes to be appended.
+      record_index_column_name: If not specified, len(raw_records) must equal
+        to record_batch.num_rows. Otherwise, `record_batch` must contain an
+        list_like<integer> column to indicate which element in `raw_records`
+        is the source of a row in `record_batch`. Specifically,
+        record_index_column[i] == [j] means the i-th row came from the j-th
+        element in `raw_records`. This column must not contain nulls, and all
+        its elements must be single-element lists.
 
-    return beam.ptransform_fn(_PTransformFn)()
-
-  @abc.abstractmethod
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    """Returns a PTransform that produces a PCollection[bytes]."""
+    Returns:
+    -------
+      A new RecordBatch whose last column is the raw record column, of given name.
+    """
+    schema = record_batch.schema
+    if record_index_column_name is None:
+        assert record_batch.num_rows == len(
+            raw_records
+        ), f"num_rows: {record_batch.num_rows} vs len raw_records: {len(raw_records)}"
+    else:
+        record_index_column_index = schema.get_field_index(record_index_column_name)
+        assert (
+            record_index_column_index != -1
+        ), f"Record index column {record_index_column_name} did not exist."
+        record_index_column = record_batch.column(record_index_column_index)
+        assert (
+            record_index_column.null_count == 0
+        ), f"Record index column must not contain nulls: {record_index_column.null_count} nulls"
+        column_type = record_index_column.type
+        assert (
+            pa.types.is_list(column_type) or pa.types.is_large_list(column_type)
+        ) and pa.types.is_integer(column_type.value_type), (
+            f"Record index column {record_index_column_name} must be of type list_like<integer>, "
+            f"but got: {column_type}"
+        )
+        record_indices = np.asarray(record_index_column.flatten())
+        assert len(record_indices) == len(record_batch), (
+            "Record indices must be aligned with the record batch, but got "
+            f"different lengths: {len(record_indices)} vs {len(record_batch)}"
+        )
+        raw_records = np.asarray(raw_records, dtype=object)[record_indices]
+    assert schema.get_field_index(column_name) == -1
+    raw_record_column = CreateRawRecordColumn(raw_records)
+    return pa.RecordBatch.from_arrays(
+        list(record_batch.columns) + [raw_record_column],
+        list(schema.names) + [column_name],
+    )
 
-  @abc.abstractmethod
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-    """Returns a PTransform that converts raw records to Arrow RecordBatches."""
-    pass
 
-  @abc.abstractmethod
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    """Returns the Arrow schema that does not contain the raw record column.
+@beam.ptransform_fn
+@beam.typehints.with_input_types(beam.Pipeline)
+@beam.typehints.with_output_types(bytes)
+def ReadTfRecord(
+    pipeline: beam.Pipeline, file_pattern: List[str]
+) -> beam.pvalue.PCollection:
+    """A Beam source that reads multiple TFRecord file patterns."""
+    assert file_pattern, "Must provide at least one file pattern."
+    # TODO(b/162261470): consider using beam.io.tfrecordio.ReadAllFromTFRecord
+    # once the # concern over size estimation is addressed (also see
+    # b/161935932#comment13).
+    pcolls = []
+    for i, f in enumerate(file_pattern):
+        pcolls.append(
+            pipeline
+            | "ReadFromTFRecord[%d]" % i
+            >> beam.io.ReadFromTFRecord(f, coder=beam.coders.BytesCoder())
+        )
+
+    return pcolls | "FlattenPCollsFromPatterns" >> beam.Flatten()
 
-    Even if self.raw_record_column is not None.
 
-    Returns:
-      a pa.Schema.
-    """
-    pass
-
-  def ArrowSchema(self) -> pa.Schema:
-    schema = self._ArrowSchemaNoRawRecordColumn()
-    if self._raw_record_column_name is not None:
-      if schema.get_field_index(self._raw_record_column_name) != -1:
-        raise ValueError(
-            "Raw record column name {} collided with a column in the schema."
-            .format(self._raw_record_column_name))
-      schema = schema.append(
-          pa.field(self._raw_record_column_name,
-                   pa.large_list(pa.large_binary())))
-    return schema
-
-  def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(Any)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(pcoll_or_pipeline: Any):
-      """Converts raw records to RecordBatches."""
-      return (
-          pcoll_or_pipeline
-          | "RawRecordBeamSource" >> self.RawRecordBeamSource()
-          | "RawRecordToRecordBatch" >> self.RawRecordToRecordBatch(batch_size))
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def _PopLabelFeatureFromDataset(self, dataset: tf.data.Dataset,
-                                  label_key: Text) -> tf.data.Dataset:
-    """Create a dataset that isolates the label_key feature from all features.
+class OverridableRecordBasedTFXIO(RecordBasedTFXIO):
+    """A TFXIO that takes user defined `BeamSource`.
 
-    Args:
-      dataset: A dataset of dicts, where the keys are feature names, and values
-        are features.
-      label_key: The name of the feature that is isolated.
+    For a `RecordBasedTFXIO`, the `BeamSource` is comprised of two PTransforms --
+    `raw_record_beam_source` and `raw_record_to_record_batch`.
+    Both are required to be overridden by the user.
 
-    Returns:
-      A dataset of tuple (label_key_feature, features_dict), where features_dict
-      does not contain label_key.
+    raw_record_beam_source is a PTransform that produces raw records as a
+    PCollection[bytes].
+    raw_record_to_record_batch is a PTransform that converts raw records to
+    PCollection[RecordBatch] taking batch size as initialization argument.
     """
-    if label_key not in self.TensorRepresentations():
-      raise ValueError(
-          "The `label_key` provided ({}) must be one of the following tensors"
-          "names: {}.".format(label_key, self.TensorRepresentations().keys()))
-    return dataset.map(lambda x: (x, x.pop(label_key)))
-
-  def RawRecordTensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    """Returns a Dataset that contains nested Datasets of raw records.
-
-    May not be implemented for some TFXIOs.
-
-    This should be used when RawTfRecordTFXIO.TensorFlowDataset does not
-    suffice. Namely, if there is some logical grouping of files which we need
-    to perform operations on, without applying the operation to each individual
-    group (i.e. shuffle).
-
-    The returned Dataset object is a dataset of datasets, where each nested
-    dataset is a dataset of serialized records. When shuffle=False (default),
-    the nested datasets are deterministically ordered. Each nested dataset can
-    represent multiple files. The files are merged into one dataset if the files
-    have the same format. For example:
-
-    ```
-    file_patterns = ['file_1', 'file_2', 'dir_1/*']
-    file_formats = ['recordio', 'recordio', 'sstable']
-    tfxio = SomeTFXIO(file_patterns, file_formats)
-    datasets = tfxio.RawRecordTensorFlowDataset(options)
-    ```
-    `datasets` would result in the following dataset: `[ds1, ds2]`. Where ds1
-    iterates over records from 'file_1' and 'file_2', and ds2 iterates over
-    records from files matched by 'dir_1/*'.
-
-    Example usage:
-    ```
-    tfxio = SomeTFXIO(file_patterns, file_formats)
-    ds = tfxio.RawRecordTensorFlowDataset(options=options)
-    ds = ds.flat_map(lambda x: x)
-    records = list(ds.as_numpy_iterator())
-    # iterating over `records` yields records from the each file in
-    # `file_patterns`. See `tf.data.Dataset.list_files` for more information
-    # about the order of files when expanding globs.
-    ```
-    Note that we need a flat_map, because `RawRecordTensorFlowDataset` returns
-    a dataset of datasets.
-
-    When shuffle=True, then the datasets not deterministically ordered,
-    but the contents of each nested dataset are deterministcally ordered.
-    For example, we may potentially have [ds2, ds1, ds3], where the
-    contents of ds1, ds2, and ds3 are all deterministcally ordered.
 
-    Args:
-      options: A TensorFlowDatasetOptions object. Not all options will apply.
-    """
-    raise NotImplementedError
+    def __init__(
+        self,
+        telemetry_descriptors: Optional[List[str]],
+        logical_format: str,
+        physical_format: str,
+        raw_record_beam_source: beam.PTransform,
+        raw_record_to_record_batch: beam.PTransform,
+    ):
+        super().__init__(telemetry_descriptors, logical_format, physical_format)
+        self._raw_record_beam_source = raw_record_beam_source
+        self._raw_record_to_record_batch = raw_record_to_record_batch
 
+    def _RawRecordBeamSourceInternal(self):
+        return self._raw_record_beam_source()
 
-def CreateRawRecordColumn(
-    raw_records: Union[np.ndarray, List[bytes]]) -> pa.Array:
-  """Returns an Array that satisfies the requirement of a raw record column."""
-  return pa.LargeListArray.from_arrays(
-      np.arange(0, len(raw_records) + 1, dtype=np.int64),
-      pa.array(raw_records, type=pa.large_binary()))
+    def _RawRecordToRecordBatchInternal(self, batch_size: Optional[int] = None):
+        return self._raw_record_to_record_batch(batch_size)
 
+    def _ArrowSchemaNoRawRecordColumn(self):
+        raise NotImplementedError
 
-def AppendRawRecordColumn(
-    record_batch: pa.RecordBatch,
-    column_name: Text,
-    raw_records: List[bytes],
-    record_index_column_name: Optional[Text] = None
-) -> pa.RecordBatch:
-  """Appends `raw_records` as a new column in `record_batch`.
-
-  Args:
-    record_batch: The RecordBatch to append to.
-    column_name: The name of the column to be appended.
-    raw_records: A list of bytes to be appended.
-    record_index_column_name: If not specified, len(raw_records) must equal
-      to record_batch.num_rows. Otherwise, `record_batch` must contain an
-      list_like<integer> column to indicate which element in `raw_records`
-      is the source of a row in `record_batch`. Specifically,
-      record_index_column[i] == [j] means the i-th row came from the j-th
-      element in `raw_records`. This column must not contain nulls, and all
-      its elements must be single-element lists.
-
-  Returns:
-    A new RecordBatch whose last column is the raw record column, of given name.
-  """
-  schema = record_batch.schema
-  if record_index_column_name is None:
-    assert record_batch.num_rows == len(
-        raw_records), "num_rows: {} vs len raw_records: {}".format(
-            record_batch.num_rows, len(raw_records))
-  else:
-    record_index_column_index = schema.get_field_index(
-        record_index_column_name)
-    assert record_index_column_index != -1, (
-        "Record index column {} did not exist."
-        .format(record_index_column_name))
-    record_index_column = record_batch.column(record_index_column_index)
-    assert record_index_column.null_count == 0, (
-        "Record index column must not contain nulls: {} nulls".format(
-            record_index_column.null_count))
-    column_type = record_index_column.type
-    assert ((pa.types.is_list(column_type) or
-             pa.types.is_large_list(column_type)) and
-            pa.types.is_integer(column_type.value_type)), (
-                "Record index column {} must be of type list_like<integer>, "
-                "but got: {}".format(record_index_column_name, column_type))
-    record_indices = np.asarray(record_index_column.flatten())
-    assert len(record_indices) == len(record_batch), (
-        "Record indices must be aligned with the record batch, but got "
-        "different lengths: {} vs {}".format(
-            len(record_indices), len(record_batch)))
-    raw_records = np.asarray(raw_records, dtype=object)[record_indices]
-  assert schema.get_field_index(column_name) == -1
-  raw_record_column = CreateRawRecordColumn(raw_records)
-  return pa.RecordBatch.from_arrays(
-      list(record_batch.columns) + [raw_record_column],
-      list(schema.names) + [column_name])
+    def _ProjectImpl(self, tensor_names: List[str]):
+        raise NotImplementedError
 
+    def TensorFlowDataset(self, options: dataset_options.TensorFlowDatasetOptions):
+        raise NotImplementedError
 
-@beam.ptransform_fn
-@beam.typehints.with_input_types(beam.Pipeline)
-@beam.typehints.with_output_types(bytes)
-def ReadTfRecord(pipeline: beam.Pipeline,
-                 file_pattern: List[Text]) -> beam.pvalue.PCollection:
-  """A Beam source that reads multiple TFRecord file patterns."""
-  assert file_pattern, "Must provide at least one file pattern."
-  # TODO(b/162261470): consider using beam.io.tfrecordio.ReadAllFromTFRecord
-  # once the # concern over size estimation is addressed (also see
-  # b/161935932#comment13).
-  pcolls = []
-  for i, f in enumerate(file_pattern):
-    pcolls.append(pipeline
-                  | "ReadFromTFRecord[%d]" % i >> beam.io.ReadFromTFRecord(
-                      f, coder=beam.coders.BytesCoder()))
-
-  return pcolls | "FlattenPCollsFromPatterns" >> beam.Flatten()
-
-
-class OverridableRecordBasedTFXIO(RecordBasedTFXIO):
-  """A TFXIO that takes user defined `BeamSource`.
-
-  For a `RecordBasedTFXIO`, the `BeamSource` is comprised of two PTransforms --
-  `raw_record_beam_source` and `raw_record_to_record_batch`.
-  Both are required to be overridden by the user.
-
-  raw_record_beam_source is a PTransform that produces raw records as a
-  PCollection[bytes].
-  raw_record_to_record_batch is a PTransform that converts raw records to
-  PCollection[RecordBatch] taking batch size as initialization argument.
-  """
-
-  def __init__(
-      self,
-      telemetry_descriptors: Optional[List[Text]],
-      logical_format: Text,
-      physical_format: Text,
-      raw_record_beam_source: beam.PTransform,
-      raw_record_to_record_batch: beam.PTransform,
-  ):
-    super().__init__(telemetry_descriptors, logical_format, physical_format)
-    self._raw_record_beam_source = raw_record_beam_source
-    self._raw_record_to_record_batch = raw_record_to_record_batch
-
-  def _RawRecordBeamSourceInternal(self):
-    return self._raw_record_beam_source()
-
-  def _RawRecordToRecordBatchInternal(self, batch_size: Optional[int] = None):
-    return self._raw_record_to_record_batch(batch_size)
-
-  def _ArrowSchemaNoRawRecordColumn(self):
-    raise NotImplementedError
-
-  def _ProjectImpl(self, tensor_names: List[Text]):
-    raise NotImplementedError
-
-  def TensorFlowDataset(self,
-                        options: dataset_options.TensorFlowDatasetOptions):
-    raise NotImplementedError
-
-  def TensorRepresentations(self):
-    raise NotImplementedError
+    def TensorRepresentations(self):
+        raise NotImplementedError
diff --git a/tfx_bsl/tfxio/record_based_tfxio_test.py b/tfx_bsl/tfxio/record_based_tfxio_test.py
index 3788a185..446b4a0c 100644
--- a/tfx_bsl/tfxio/record_based_tfxio_test.py
+++ b/tfx_bsl/tfxio/record_based_tfxio_test.py
@@ -14,154 +14,171 @@
 """Tests for tfx_bsl.tfxio.record_based_tfxio."""
 
 import os
-import pytest
 import tempfile
-
 from typing import Any
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_test_util
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.tfxio import record_based_tfxio
-
-from absl.testing import absltest
-from absl.testing import parameterized
+from absl import flags
+from absl.testing import absltest, parameterized
+from apache_beam.testing import util as beam_test_util
 
+from tfx_bsl.tfxio import record_based_tfxio
 
 FLAGS = flags.FLAGS
 
 
 def _WriteTfRecord(path, records):
-  with tf.io.TFRecordWriter(path) as w:
-    for r in records:
-      w.write(r)
+    with tf.io.TFRecordWriter(path) as w:
+        for r in records:
+            w.write(r)
 
 
 class RecordBasedTfxioTest(parameterized.TestCase):
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def testReadTfRecord(self):
-    tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
-    file1 = os.path.join(tmp_dir, "tfrecord1")
-    file1_records = [b"aa", b"bb"]
-    _WriteTfRecord(file1, file1_records)
-    file2 = os.path.join(tmp_dir, "tfrecord2")
-    file2_records = [b"cc", b"dd"]
-    _WriteTfRecord(file2, file2_records)
-
-    def _CheckRecords(actual, expected):
-      self.assertEqual(set(actual), set(expected))
-
-    # Test reading multiple file patterns.
-    with beam.Pipeline() as p:
-      record_pcoll = p | record_based_tfxio.ReadTfRecord(
-          [file1 + "*", file2 + "*"])
-      beam_test_util.assert_that(
-          record_pcoll,
-          lambda actual: _CheckRecords(actual, file1_records + file2_records))
-
-  @parameterized.named_parameters(*[
-      dict(
-          testcase_name="simple",
-          input_record_batch=pa.record_batch([pa.array([[1], [2]])],
-                                             ["feature1"]),
-          raw_records=[b"aa", b"bb"],
-          expected_raw_record_column=pa.array(
-              [[b"aa"], [b"bb"]], type=pa.large_list(pa.large_binary()))),
-      dict(
-          testcase_name="with_record_index",
-          input_record_batch=pa.record_batch(
-              [pa.array([[1], [2], [3]]),
-               pa.array([[0], [1], [1]])], ["feature1", "record_index"]),
-          raw_records=[b"aa", b"bb"],
-          expected_raw_record_column=pa.array([[b"aa"], [b"bb"], [b"bb"]],
-                                              type=pa.large_list(
-                                                  pa.large_binary())),
-          record_index_column_name="record_index",
-      ),
-      dict(
-          testcase_name="with_record_index_empty_input",
-          input_record_batch=pa.record_batch([
-              pa.array([], type=pa.list_(pa.int64())),
-              pa.array([], type=pa.large_list(pa.int32()))
-          ], ["feature1", "record_index"]),
-          raw_records=[b"aa", b"bb"],
-          expected_raw_record_column=pa.array(
-              [], type=pa.large_list(pa.large_binary())),
-          record_index_column_name="record_index",
-      )
-  ])
-  def testAppendRawRecordColumn(
-      self, input_record_batch,
-      raw_records,
-      expected_raw_record_column,
-      record_index_column_name=None):
-    column_name = "raw_record"
-    output_record_batch = record_based_tfxio.AppendRawRecordColumn(
-        record_batch=input_record_batch, column_name=column_name,
-        raw_records=raw_records,
-        record_index_column_name=record_index_column_name)
-    self.assertEqual(
-        output_record_batch.num_columns,
-        input_record_batch.num_columns + 1)
-    for i in range(input_record_batch.num_columns):
-      self.assertTrue(
-          input_record_batch.column(i).equals(output_record_batch.column(i)))
-
-    self.assertEqual(
-        output_record_batch.schema.names[output_record_batch.num_columns - 1],
-        column_name)
-    self.assertTrue(
-        output_record_batch.column(output_record_batch.num_columns - 1)
-        .equals(expected_raw_record_column))
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def testOverridableRecordBasedTFXIO(self):
-    tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
-    file1 = os.path.join(tmp_dir, "tfrecord1")
-    file1_records = [b"aa", b"bb"]
-    _WriteTfRecord(file1, file1_records)
-
-    def _CheckRecords(actual, expected):
-      for a, e in zip(actual, expected):
-        self.assertDictEqual(a.to_pydict(), e)
-
-    @beam.typehints.with_input_types(Any)
-    @beam.typehints.with_output_types(bytes)
-    def _RawRecordBeamSource(pipeline: Any):
-      return pipeline | beam.io.ReadFromTFRecord(file1 + "*")
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _RawRecordsToRecordBatch(pcoll, batch_size):
-      batch_size = 1 if not batch_size else batch_size
-
-      class _CreateRBDoFn(beam.DoFn):
-
-        def process(self, examples):
-          return [
-              pa.RecordBatch.from_arrays([pa.array(examples)], ["column_name"])
-          ]
-
-      return (pcoll | beam.BatchElements(batch_size)
-              | beam.ParDo(_CreateRBDoFn()))
-
-    tfxio = record_based_tfxio.OverridableRecordBasedTFXIO(
-        telemetry_descriptors=None,
-        logical_format="tfrecord",
-        physical_format="tf_example",
-        raw_record_beam_source=beam.ptransform_fn(_RawRecordBeamSource),
-        raw_record_to_record_batch=beam.ptransform_fn(_RawRecordsToRecordBatch))
-
-    expected = [{"column_name": [b"aa"]}, {"column_name": [b"bb"]}]
-    with beam.Pipeline() as p:
-      record_pcoll = p | tfxio.BeamSource()
-      beam_test_util.assert_that(
-          record_pcoll,
-          lambda actual: _CheckRecords(actual, expected))
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def testReadTfRecord(self):
+        tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
+        file1 = os.path.join(tmp_dir, "tfrecord1")
+        file1_records = [b"aa", b"bb"]
+        _WriteTfRecord(file1, file1_records)
+        file2 = os.path.join(tmp_dir, "tfrecord2")
+        file2_records = [b"cc", b"dd"]
+        _WriteTfRecord(file2, file2_records)
+
+        def _CheckRecords(actual, expected):
+            self.assertEqual(set(actual), set(expected))
+
+        # Test reading multiple file patterns.
+        with beam.Pipeline() as p:
+            record_pcoll = p | record_based_tfxio.ReadTfRecord(
+                [file1 + "*", file2 + "*"]
+            )
+            beam_test_util.assert_that(
+                record_pcoll,
+                lambda actual: _CheckRecords(actual, file1_records + file2_records),
+            )
+
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="simple",
+                input_record_batch=pa.record_batch(
+                    [pa.array([[1], [2]])], ["feature1"]
+                ),
+                raw_records=[b"aa", b"bb"],
+                expected_raw_record_column=pa.array(
+                    [[b"aa"], [b"bb"]], type=pa.large_list(pa.large_binary())
+                ),
+            ),
+            dict(
+                testcase_name="with_record_index",
+                input_record_batch=pa.record_batch(
+                    [pa.array([[1], [2], [3]]), pa.array([[0], [1], [1]])],
+                    ["feature1", "record_index"],
+                ),
+                raw_records=[b"aa", b"bb"],
+                expected_raw_record_column=pa.array(
+                    [[b"aa"], [b"bb"], [b"bb"]], type=pa.large_list(pa.large_binary())
+                ),
+                record_index_column_name="record_index",
+            ),
+            dict(
+                testcase_name="with_record_index_empty_input",
+                input_record_batch=pa.record_batch(
+                    [
+                        pa.array([], type=pa.list_(pa.int64())),
+                        pa.array([], type=pa.large_list(pa.int32())),
+                    ],
+                    ["feature1", "record_index"],
+                ),
+                raw_records=[b"aa", b"bb"],
+                expected_raw_record_column=pa.array(
+                    [], type=pa.large_list(pa.large_binary())
+                ),
+                record_index_column_name="record_index",
+            ),
+        ]
+    )
+    def testAppendRawRecordColumn(
+        self,
+        input_record_batch,
+        raw_records,
+        expected_raw_record_column,
+        record_index_column_name=None,
+    ):
+        column_name = "raw_record"
+        output_record_batch = record_based_tfxio.AppendRawRecordColumn(
+            record_batch=input_record_batch,
+            column_name=column_name,
+            raw_records=raw_records,
+            record_index_column_name=record_index_column_name,
+        )
+        self.assertEqual(
+            output_record_batch.num_columns, input_record_batch.num_columns + 1
+        )
+        for i in range(input_record_batch.num_columns):
+            self.assertTrue(
+                input_record_batch.column(i).equals(output_record_batch.column(i))
+            )
+
+        self.assertEqual(
+            output_record_batch.schema.names[output_record_batch.num_columns - 1],
+            column_name,
+        )
+        self.assertTrue(
+            output_record_batch.column(output_record_batch.num_columns - 1).equals(
+                expected_raw_record_column
+            )
+        )
+
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def testOverridableRecordBasedTFXIO(self):
+        tmp_dir = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
+        file1 = os.path.join(tmp_dir, "tfrecord1")
+        file1_records = [b"aa", b"bb"]
+        _WriteTfRecord(file1, file1_records)
+
+        def _CheckRecords(actual, expected):
+            for a, e in zip(actual, expected):
+                self.assertDictEqual(a.to_pydict(), e)
+
+        @beam.typehints.with_input_types(Any)
+        @beam.typehints.with_output_types(bytes)
+        def _RawRecordBeamSource(pipeline: Any):
+            return pipeline | beam.io.ReadFromTFRecord(file1 + "*")
+
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _RawRecordsToRecordBatch(pcoll, batch_size):
+            batch_size = 1 if not batch_size else batch_size
+
+            class _CreateRBDoFn(beam.DoFn):
+                def process(self, examples):
+                    return [
+                        pa.RecordBatch.from_arrays(
+                            [pa.array(examples)], ["column_name"]
+                        )
+                    ]
+
+            return pcoll | beam.BatchElements(batch_size) | beam.ParDo(_CreateRBDoFn())
+
+        tfxio = record_based_tfxio.OverridableRecordBasedTFXIO(
+            telemetry_descriptors=None,
+            logical_format="tfrecord",
+            physical_format="tf_example",
+            raw_record_beam_source=beam.ptransform_fn(_RawRecordBeamSource),
+            raw_record_to_record_batch=beam.ptransform_fn(_RawRecordsToRecordBatch),
+        )
+
+        expected = [{"column_name": [b"aa"]}, {"column_name": [b"bb"]}]
+        with beam.Pipeline() as p:
+            record_pcoll = p | tfxio.BeamSource()
+            beam_test_util.assert_that(
+                record_pcoll, lambda actual: _CheckRecords(actual, expected)
+            )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/record_to_tensor_tfxio.py b/tfx_bsl/tfxio/record_to_tensor_tfxio.py
index da82faec..aecc86a5 100644
--- a/tfx_bsl/tfxio/record_to_tensor_tfxio.py
+++ b/tfx_bsl/tfxio/record_to_tensor_tfxio.py
@@ -15,363 +15,407 @@
 
 import copy
 import datetime
-from typing import Any, Callable, Dict, List, Iterator, Optional, Text, Union
+from typing import Any, Callable, Dict, Iterator, List, Optional, Text, Union
 
 import apache_beam as beam
-from apache_beam.utils import shared
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.coders import batch_util
-from tfx_bsl.coders import tf_graph_record_decoder
+from apache_beam.utils import shared
+
+from tfx_bsl.coders import batch_util, tf_graph_record_decoder
 from tfx_bsl.telemetry import util as telemetry_util
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import dataset_util
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_to_arrow
-from tfx_bsl.tfxio import tfxio
+from tfx_bsl.tfxio import (
+    dataset_options,
+    dataset_util,
+    record_based_tfxio,
+    tensor_adapter,
+    tensor_to_arrow,
+    tfxio,
+)
 
 
 class RecordToTensorTFXIO(record_based_tfxio.RecordBasedTFXIO):
-  """Base class for TFXIO implementations that uses TFGraphRecordDecoder."""
-
-  def __init__(self,
-               saved_decoder_path: Text,
-               telemetry_descriptors: List[Text],
-               physical_format: Text,
-               use_singleton_decoder: bool,
-               raw_record_column_name: Optional[Text]):
-    super().__init__(
-        telemetry_descriptors,
-        logical_format="tensor",
-        physical_format=physical_format,
-        raw_record_column_name=raw_record_column_name)
-    self._saved_decoder_path = saved_decoder_path
-    decoder = tf_graph_record_decoder.load_decoder(saved_decoder_path)
-    tensor_to_arrow_converter = tensor_to_arrow.TensorsToRecordBatchConverter(
-        decoder.output_type_specs())
-
-    self._arrow_schema_no_raw_record_column = (
-        tensor_to_arrow_converter.arrow_schema())
-    self._tensor_representations = (
-        tensor_to_arrow_converter.tensor_representations())
-    self._use_singleton_decoder = use_singleton_decoder
-
-    self._record_index_column_name = None
-    record_index_tensor_name = decoder.record_index_tensor_name
-    if record_index_tensor_name is not None:
-      record_index_tensor_rep = self._tensor_representations[
-          record_index_tensor_name]
-      if record_index_tensor_rep.HasField("ragged_tensor"):
-        assert len(record_index_tensor_rep.ragged_tensor.feature_path.step) == 1
-        self._record_index_column_name = (
-            record_index_tensor_rep.ragged_tensor.feature_path.step[0])
-      elif record_index_tensor_rep.HasField("varlen_sparse_tensor"):
-        self._record_index_column_name = (
-            record_index_tensor_rep.varlen_sparse_tensor.column_name)
-      else:
-        raise ValueError("The record index tensor must be a RaggedTensor or a "
-                         "VarLenSparseTensor, but got: {}"
-                         .format(record_index_tensor_rep))
-
-    if raw_record_column_name in self._arrow_schema_no_raw_record_column.names:
-      raise ValueError("raw record column name: {} collided with an existing "
-                       "column.".format(raw_record_column_name))
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return True
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return self._tensor_representations
-
-  def DecodeFunction(self) -> Callable[[tf.Tensor], Dict[Text, Any]]:
-    """Returns the decode function provided by the decoder.
-
-    Returns:
-      A TF function that takes a 1-D string tensor and returns a dict from
-      strings to (composite) tensors.
-    """
-    decoder = tf_graph_record_decoder.load_decoder(self._saved_decoder_path)
-    return decoder.decode_record
-
-  def _RawRecordToRecordBatchInternal(  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
-      self, batch_size: Optional[int]) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
-      return (
-          raw_records_pcoll
-          | "BatchElements"
-          >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
-          | "Decode"
-          >> beam.ParDo(
-              _RecordsToRecordBatch(
-                  self._saved_decoder_path,
-                  self.telemetry_descriptors,
-                  shared.Shared() if self._use_singleton_decoder else None,
-                  self.raw_record_column_name,
-                  self._record_index_column_name,
-              )
-          )
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    return self._arrow_schema_no_raw_record_column
-
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    # We could do better by plumbing the information back to the decoder.
-    self_copy = copy.copy(self)
-    self_copy._tensor_representations = {  # pylint: disable=protected-access
-        k: v
-        for k, v in self._tensor_representations.items()
-        if k in set(tensor_names)
-    }
-    return self_copy
-
-  def _ApplyDecoderToDataset(
-      self, dataset: tf.data.Dataset) -> tf.data.Dataset:
-    decoder = tf_graph_record_decoder.load_decoder(self._saved_decoder_path)
-
-    def _ParseFn(record):
-      tensors_dict = decoder.decode_record(record)
-      return {
-          k: v
-          for k, v in tensors_dict.items()
-          if k in self.TensorRepresentations()
-      }
-    return dataset.map(_ParseFn)
+    """Base class for TFXIO implementations that uses TFGraphRecordDecoder."""
+
+    def __init__(
+        self,
+        saved_decoder_path: str,
+        telemetry_descriptors: List[str],
+        physical_format: str,
+        use_singleton_decoder: bool,
+        raw_record_column_name: Optional[str],
+    ):
+        super().__init__(
+            telemetry_descriptors,
+            logical_format="tensor",
+            physical_format=physical_format,
+            raw_record_column_name=raw_record_column_name,
+        )
+        self._saved_decoder_path = saved_decoder_path
+        decoder = tf_graph_record_decoder.load_decoder(saved_decoder_path)
+        tensor_to_arrow_converter = tensor_to_arrow.TensorsToRecordBatchConverter(
+            decoder.output_type_specs()
+        )
+
+        self._arrow_schema_no_raw_record_column = (
+            tensor_to_arrow_converter.arrow_schema()
+        )
+        self._tensor_representations = (
+            tensor_to_arrow_converter.tensor_representations()
+        )
+        self._use_singleton_decoder = use_singleton_decoder
+
+        self._record_index_column_name = None
+        record_index_tensor_name = decoder.record_index_tensor_name
+        if record_index_tensor_name is not None:
+            record_index_tensor_rep = self._tensor_representations[
+                record_index_tensor_name
+            ]
+            if record_index_tensor_rep.HasField("ragged_tensor"):
+                assert len(record_index_tensor_rep.ragged_tensor.feature_path.step) == 1
+                self._record_index_column_name = (
+                    record_index_tensor_rep.ragged_tensor.feature_path.step[0]
+                )
+            elif record_index_tensor_rep.HasField("varlen_sparse_tensor"):
+                self._record_index_column_name = (
+                    record_index_tensor_rep.varlen_sparse_tensor.column_name
+                )
+            else:
+                raise ValueError(
+                    "The record index tensor must be a RaggedTensor or a "
+                    f"VarLenSparseTensor, but got: {record_index_tensor_rep}"
+                )
+
+        if raw_record_column_name in self._arrow_schema_no_raw_record_column.names:
+            raise ValueError(
+                f"raw record column name: {raw_record_column_name} collided with an existing "
+                "column."
+            )
+
+    def SupportAttachingRawRecords(self) -> bool:
+        return True
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return self._tensor_representations
+
+    def DecodeFunction(self) -> Callable[[tf.Tensor], Dict[str, Any]]:
+        """Returns the decode function provided by the decoder.
+
+        Returns
+        -------
+          A TF function that takes a 1-D string tensor and returns a dict from
+          strings to (composite) tensors.
+        """
+        decoder = tf_graph_record_decoder.load_decoder(self._saved_decoder_path)
+        return decoder.decode_record
+
+    def _RawRecordToRecordBatchInternal(  # pytype: disable=signature-mismatch  # overriding-parameter-count-checks
+        self, batch_size: Optional[int]
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
+            return (
+                raw_records_pcoll
+                | "BatchElements"
+                >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
+                | "Decode"
+                >> beam.ParDo(
+                    _RecordsToRecordBatch(
+                        self._saved_decoder_path,
+                        self.telemetry_descriptors,
+                        shared.Shared() if self._use_singleton_decoder else None,
+                        self.raw_record_column_name,
+                        self._record_index_column_name,
+                    )
+                )
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        return self._arrow_schema_no_raw_record_column
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        # We could do better by plumbing the information back to the decoder.
+        self_copy = copy.copy(self)
+        self_copy._tensor_representations = {  # pylint: disable=protected-access
+            k: v
+            for k, v in self._tensor_representations.items()
+            if k in set(tensor_names)
+        }
+        return self_copy
+
+    def _ApplyDecoderToDataset(self, dataset: tf.data.Dataset) -> tf.data.Dataset:
+        decoder = tf_graph_record_decoder.load_decoder(self._saved_decoder_path)
+
+        def _ParseFn(record):
+            tensors_dict = decoder.decode_record(record)
+            return {
+                k: v
+                for k, v in tensors_dict.items()
+                if k in self.TensorRepresentations()
+            }
+
+        return dataset.map(_ParseFn)
 
 
 class BeamRecordToTensorTFXIO(RecordToTensorTFXIO):
-  """TFXIO implementation that decodes records in pcoll[bytes] with TF Graph."""
-
-  def __init__(self,
-               saved_decoder_path: Text,
-               telemetry_descriptors: List[Text],
-               physical_format: Text,
-               raw_record_column_name: Optional[Text],
-               experimental_use_singleton_decoder: bool = False):
-    """Initializer.
-
-    Args:
-      saved_decoder_path: The path to the saved TfGraphRecordDecoder to be
-        used for decoding the records. Note that this path must be accessible
-        by beam workers.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      physical_format: A string that describes the physical format of the data.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
-      experimental_use_singleton_decoder: Experimental flag. May go away without
-        notice. DO NOT SET.
-    """
-    super().__init__(
-        saved_decoder_path=saved_decoder_path,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format=physical_format,
-        use_singleton_decoder=experimental_use_singleton_decoder,
-        raw_record_column_name=raw_record_column_name)
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return (beam.ptransform_fn(lambda x: x)()
+    """TFXIO implementation that decodes records in pcoll[bytes] with TF Graph."""
+
+    def __init__(
+        self,
+        saved_decoder_path: str,
+        telemetry_descriptors: List[str],
+        physical_format: str,
+        raw_record_column_name: Optional[str],
+        experimental_use_singleton_decoder: bool = False,
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          saved_decoder_path: The path to the saved TfGraphRecordDecoder to be
+            used for decoding the records. Note that this path must be accessible
+            by beam workers.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          physical_format: A string that describes the physical format of the data.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+          experimental_use_singleton_decoder: Experimental flag. May go away without
+            notice. DO NOT SET.
+        """
+        super().__init__(
+            saved_decoder_path=saved_decoder_path,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format=physical_format,
+            use_singleton_decoder=experimental_use_singleton_decoder,
+            raw_record_column_name=raw_record_column_name,
+        )
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return (
+            beam.ptransform_fn(lambda x: x)()
             .with_input_types(bytes)
-            .with_output_types(bytes))
+            .with_output_types(bytes)
+        )
 
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    raise NotImplementedError
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        raise NotImplementedError
 
 
 class TFRecordToTensorTFXIO(RecordToTensorTFXIO):
-  """Uses a TfGraphRecordDecoder to decode records on TFRecord files.
-
-  This TFXIO assumes the data records are stored in TFRecord and takes a user
-  provided TF-graph-based decoder (see tfx_bsl.coders.tf_graph_record_decoder)
-  that decodes the records to TF (composite) tensors. The RecordBatches yielded
-  by this TFXIO is converted from those tensors, and it's guaranteed that the
-  TensorAdapter created by this TFXIO will be able to turn those RecordBatches
-  to tensors identical to the TF-graph-based decoder's output.
-  """
-
-  def __init__(self,
-               file_pattern: Union[List[Text], Text],
-               saved_decoder_path: Text,
-               telemetry_descriptors: List[Text],
-               raw_record_column_name: Optional[Text] = None):
-    """Initializer.
-
-    Args:
-      file_pattern: One or a list of glob patterns. If a list, must not be
-        empty.
-      saved_decoder_path: The path to the saved TfGraphRecordDecoder to be
-        used for decoding the records. Note that this path must be accessible
-        by beam workers.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
+    """Uses a TfGraphRecordDecoder to decode records on TFRecord files.
+
+    This TFXIO assumes the data records are stored in TFRecord and takes a user
+    provided TF-graph-based decoder (see tfx_bsl.coders.tf_graph_record_decoder)
+    that decodes the records to TF (composite) tensors. The RecordBatches yielded
+    by this TFXIO is converted from those tensors, and it's guaranteed that the
+    TensorAdapter created by this TFXIO will be able to turn those RecordBatches
+    to tensors identical to the TF-graph-based decoder's output.
     """
-    super().__init__(
-        saved_decoder_path=saved_decoder_path,
-        telemetry_descriptors=telemetry_descriptors,
-        use_singleton_decoder=False,
-        physical_format="tfrecords_gzip",
-        raw_record_column_name=raw_record_column_name)
-    if not isinstance(file_pattern, list):
-      file_pattern = [file_pattern]
-    assert file_pattern, "Must provide at least one file pattern."
-    self._file_pattern = file_pattern
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return record_based_tfxio.ReadTfRecord(self._file_pattern)
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    """Creates a TFRecordDataset that yields Tensors.
-
-    The records are parsed by the decoder to create Tensors. This implementation
-    is based on tf.data.experimental.ops.make_tf_record_dataset().
-
-    See base class (tfxio.TFXIO) for more details.
-
-    Args:
-      options: an options object for the tf.data.Dataset. See
-        `dataset_options.TensorFlowDatasetOptions` for more details.
-        options.batch_size is the batch size of the input records, but if the
-        input record and the output batched tensors by the decoder are not
-        batch-aligned (i.e. 1 input record results in 1 "row" in the output
-        tensors), then the output may not be of the given batch size. Use
-        dataset.unbatch().batch(desired_batch_size) to force the output batch
-        size.
-
-    Returns:
-      A dataset of `dict` elements, (or a tuple of `dict` elements and label).
-      Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
-      `RaggedTensor` objects.
-
-    Raises:
-      ValueError: if label_key in the dataset option is not in the arrow schema.
+
+    def __init__(
+        self,
+        file_pattern: Union[List[str], str],
+        saved_decoder_path: str,
+        telemetry_descriptors: List[str],
+        raw_record_column_name: Optional[str] = None,
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          file_pattern: One or a list of glob patterns. If a list, must not be
+            empty.
+          saved_decoder_path: The path to the saved TfGraphRecordDecoder to be
+            used for decoding the records. Note that this path must be accessible
+            by beam workers.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+        """
+        super().__init__(
+            saved_decoder_path=saved_decoder_path,
+            telemetry_descriptors=telemetry_descriptors,
+            use_singleton_decoder=False,
+            physical_format="tfrecords_gzip",
+            raw_record_column_name=raw_record_column_name,
+        )
+        if not isinstance(file_pattern, list):
+            file_pattern = [file_pattern]
+        assert file_pattern, "Must provide at least one file pattern."
+        self._file_pattern = file_pattern
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return record_based_tfxio.ReadTfRecord(self._file_pattern)
+
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        """Creates a TFRecordDataset that yields Tensors.
+
+        The records are parsed by the decoder to create Tensors. This implementation
+        is based on tf.data.experimental.ops.make_tf_record_dataset().
+
+        See base class (tfxio.TFXIO) for more details.
+
+        Args:
+        ----
+          options: an options object for the tf.data.Dataset. See
+            `dataset_options.TensorFlowDatasetOptions` for more details.
+            options.batch_size is the batch size of the input records, but if the
+            input record and the output batched tensors by the decoder are not
+            batch-aligned (i.e. 1 input record results in 1 "row" in the output
+            tensors), then the output may not be of the given batch size. Use
+            dataset.unbatch().batch(desired_batch_size) to force the output batch
+            size.
+
+        Returns:
+        -------
+          A dataset of `dict` elements, (or a tuple of `dict` elements and label).
+          Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
+          `RaggedTensor` objects.
+
+        Raises:
+        ------
+          ValueError: if label_key in the dataset option is not in the arrow schema.
+        """
+        dataset = dataset_util.make_tf_record_dataset(
+            file_pattern=self._file_pattern,
+            batch_size=options.batch_size,
+            drop_final_batch=options.drop_final_batch,
+            num_epochs=options.num_epochs,
+            shuffle=options.shuffle,
+            shuffle_buffer_size=options.shuffle_buffer_size,
+            shuffle_seed=options.shuffle_seed,
+            sloppy_ordering=options.sloppy_ordering,
+        )
+        dataset = self._ApplyDecoderToDataset(dataset)
+
+        label_key = options.label_key
+        if label_key is not None:
+            dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
+
+        return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+
+
+class _DecodeFnWrapper:
+    """A wrapper over a saved decoder.
+
+    Thread-safe (all the fields should be considered read-only).
     """
 
-    dataset = dataset_util.make_tf_record_dataset(
-        file_pattern=self._file_pattern,
-        batch_size=options.batch_size,
-        drop_final_batch=options.drop_final_batch,
-        num_epochs=options.num_epochs,
-        shuffle=options.shuffle,
-        shuffle_buffer_size=options.shuffle_buffer_size,
-        shuffle_seed=options.shuffle_seed,
-        sloppy_ordering=options.sloppy_ordering)
-    dataset = self._ApplyDecoderToDataset(dataset)
-
-    label_key = options.label_key
-    if label_key is not None:
-      dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
-
-    return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
-
-
-class _DecodeFnWrapper(object):
-  """A wrapper over a saved decoder.
-
-  Thread-safe (all the fields should be considered read-only).
-  """
-
-  __slots__ = ["saved_decoder_path", "output_type_specs", "decode_fn",
-               # required in order to create weakrefs to a _DecodeFnWrapper.
-               "__weakref__"]
-
-  def __init__(self, saved_decoder_path: Text):
-    self.saved_decoder_path = saved_decoder_path
-    _MaybeRegisterStruct2TensorOps()
-    decoder = tf_graph_record_decoder.load_decoder(saved_decoder_path)
-    self.output_type_specs = decoder.output_type_specs()
-    # Store the concrete function to avoid tracing upon calling.
-    # TF guarantees its thread-safey.
-    self.decode_fn = decoder.decode_record
-    # Call the concrete function once to force optimization of the graph, as
-    # we want that to be attributed as fixed setup cost.
-    try:
-      _ = self.decode_fn(tf.constant([], shape=[0], dtype=tf.string))
-    except Exception:  # pylint:disable=broad-except
-      pass
+    __slots__ = [
+        "saved_decoder_path",
+        "output_type_specs",
+        "decode_fn",
+        # required in order to create weakrefs to a _DecodeFnWrapper.
+        "__weakref__",
+    ]
+
+    def __init__(self, saved_decoder_path: str):
+        self.saved_decoder_path = saved_decoder_path
+        _MaybeRegisterStruct2TensorOps()
+        decoder = tf_graph_record_decoder.load_decoder(saved_decoder_path)
+        self.output_type_specs = decoder.output_type_specs()
+        # Store the concrete function to avoid tracing upon calling.
+        # TF guarantees its thread-safey.
+        self.decode_fn = decoder.decode_record
+        # Call the concrete function once to force optimization of the graph, as
+        # we want that to be attributed as fixed setup cost.
+        try:
+            _ = self.decode_fn(tf.constant([], shape=[0], dtype=tf.string))
+        except Exception:  # pylint:disable=broad-except
+            pass
 
 
 @beam.typehints.with_input_types(List[bytes])
 @beam.typehints.with_output_types(pa.RecordBatch)
 class _RecordsToRecordBatch(beam.DoFn):
-  """DoFn to convert raw records to RecordBatches."""
-
-  def __init__(self, saved_decoder_path: Text,
-               telemetry_descriptors: List[Text],
-               shared_decode_fn_handle: Optional[shared.Shared],
-               raw_record_column_name: Optional[Text],
-               record_index_column_name: Optional[Text]):
-    super().__init__()
-    self._saved_decoder_path = saved_decoder_path
-    self._raw_record_column_name = raw_record_column_name
-    self._record_index_column_name = record_index_column_name
-    self._shared_decode_fn_handle = shared_decode_fn_handle
-
-    self._tensors_to_record_batch_converter = None
-    self._decode_fn = None
-    self._decoder_load_seconds_distribution = beam.metrics.Metrics.distribution(
-        telemetry_util.MakeTfxNamespace(telemetry_descriptors),
-        "record_to_tensor_tfxio_decoder_load_seconds")
-    self._decoder_load_seconds = None
-
-  def setup(self):
-    start = datetime.datetime.now()
-    if self._shared_decode_fn_handle is not None:
-      decode_fn_wrapper = self._shared_decode_fn_handle.acquire(
-          lambda: _DecodeFnWrapper(self._saved_decoder_path))
-      assert decode_fn_wrapper.saved_decoder_path == self._saved_decoder_path
-    else:
-      decode_fn_wrapper = _DecodeFnWrapper(self._saved_decoder_path)
-    self._tensors_to_record_batch_converter = (
-        tensor_to_arrow.TensorsToRecordBatchConverter(
-            decode_fn_wrapper.output_type_specs))
-    self._decode_fn = decode_fn_wrapper.decode_fn
-    self._decoder_load_seconds = int(
-        (datetime.datetime.now() - start).total_seconds())
-
-  def finish_bundle(self):
-    if self._decoder_load_seconds is not None:
-      self._decoder_load_seconds_distribution.update(self._decoder_load_seconds)
-      self._decoder_load_seconds = None
-
-  def process(self, records: List[bytes]) -> Iterator[pa.RecordBatch]:
-    assert self._tensors_to_record_batch_converter is not None  # By setup().
-    decoded = self._tensors_to_record_batch_converter.convert(
-        self._decode_fn(tf.convert_to_tensor(records, dtype=tf.string)))
-    if self._raw_record_column_name is None:
-      yield decoded
-    else:
-      yield record_based_tfxio.AppendRawRecordColumn(
-          decoded, self._raw_record_column_name, records,
-          self._record_index_column_name)
+    """DoFn to convert raw records to RecordBatches."""
+
+    def __init__(
+        self,
+        saved_decoder_path: str,
+        telemetry_descriptors: List[str],
+        shared_decode_fn_handle: Optional[shared.Shared],
+        raw_record_column_name: Optional[str],
+        record_index_column_name: Optional[str],
+    ):
+        super().__init__()
+        self._saved_decoder_path = saved_decoder_path
+        self._raw_record_column_name = raw_record_column_name
+        self._record_index_column_name = record_index_column_name
+        self._shared_decode_fn_handle = shared_decode_fn_handle
+
+        self._tensors_to_record_batch_converter = None
+        self._decode_fn = None
+        self._decoder_load_seconds_distribution = beam.metrics.Metrics.distribution(
+            telemetry_util.MakeTfxNamespace(telemetry_descriptors),
+            "record_to_tensor_tfxio_decoder_load_seconds",
+        )
+        self._decoder_load_seconds = None
+
+    def setup(self):
+        start = datetime.datetime.now()
+        if self._shared_decode_fn_handle is not None:
+            decode_fn_wrapper = self._shared_decode_fn_handle.acquire(
+                lambda: _DecodeFnWrapper(self._saved_decoder_path)
+            )
+            assert decode_fn_wrapper.saved_decoder_path == self._saved_decoder_path
+        else:
+            decode_fn_wrapper = _DecodeFnWrapper(self._saved_decoder_path)
+        self._tensors_to_record_batch_converter = (
+            tensor_to_arrow.TensorsToRecordBatchConverter(
+                decode_fn_wrapper.output_type_specs
+            )
+        )
+        self._decode_fn = decode_fn_wrapper.decode_fn
+        self._decoder_load_seconds = int(
+            (datetime.datetime.now() - start).total_seconds()
+        )
+
+    def finish_bundle(self):
+        if self._decoder_load_seconds is not None:
+            self._decoder_load_seconds_distribution.update(self._decoder_load_seconds)
+            self._decoder_load_seconds = None
+
+    def process(self, records: List[bytes]) -> Iterator[pa.RecordBatch]:
+        assert self._tensors_to_record_batch_converter is not None  # By setup().
+        decoded = self._tensors_to_record_batch_converter.convert(
+            self._decode_fn(tf.convert_to_tensor(records, dtype=tf.string))
+        )
+        if self._raw_record_column_name is None:
+            yield decoded
+        else:
+            yield record_based_tfxio.AppendRawRecordColumn(
+                decoded,
+                self._raw_record_column_name,
+                records,
+                self._record_index_column_name,
+            )
 
 
 # TODO(b/159982957): Replace this with a mechanism that registers any custom
 # op.
 def _MaybeRegisterStruct2TensorOps():
-  try:
-    import struct2tensor as _  # pylint: disable=g-import-not-at-top
-  except (ImportError, tf.errors.NotFoundError):
-    pass
+    try:
+        import struct2tensor as _  # pylint: disable=g-import-not-at-top
+    except (ImportError, tf.errors.NotFoundError):
+        pass
diff --git a/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py b/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py
index 2495d3a2..cd2f5c2c 100644
--- a/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py
+++ b/tfx_bsl/tfxio/record_to_tensor_tfxio_test.py
@@ -14,316 +14,344 @@
 """Tests for tfx_bsl.tfxio.record_to_tensor_tfxio."""
 
 import os
-import pytest
 import tempfile
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.coders import tf_graph_record_decoder
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import record_to_tensor_tfxio
-from tfx_bsl.tfxio import telemetry_test_util
-
-from google.protobuf import text_format
+from absl import flags
 from absl.testing import parameterized
+from apache_beam.testing import util as beam_testing_util
+from google.protobuf import text_format
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import tf_graph_record_decoder
+from tfx_bsl.tfxio import dataset_options, record_to_tensor_tfxio, telemetry_test_util
 
 FLAGS = flags.FLAGS
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class _DecoderForTesting(tf_graph_record_decoder.TFGraphRecordDecoder):
-
-  def decode_record(self, record):
-    indices = tf.transpose(
-        tf.stack([
-            tf.range(tf.size(record), dtype=tf.int64),
-            tf.zeros(tf.size(record), dtype=tf.int64)
-        ]))
-    sparse_tensor = tf.SparseTensor(
-        values=record, indices=indices, dense_shape=[tf.size(record), 1])
-    return {
-        "st1": sparse_tensor,
-        "st2": sparse_tensor
-    }
+    def decode_record(self, record):
+        indices = tf.transpose(
+            tf.stack(
+                [
+                    tf.range(tf.size(record), dtype=tf.int64),
+                    tf.zeros(tf.size(record), dtype=tf.int64),
+                ]
+            )
+        )
+        sparse_tensor = tf.SparseTensor(
+            values=record, indices=indices, dense_shape=[tf.size(record), 1]
+        )
+        return {"st1": sparse_tensor, "st2": sparse_tensor}
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class _DecoderForTestingWithRecordIndex(_DecoderForTesting):
-
-  def decode_record(self, record):
-    result = super(
-        _DecoderForTestingWithRecordIndex, self).decode_record(record)
-    result["ragged_record_index"] = tf.RaggedTensor.from_row_splits(
-        values=tf.range(tf.size(record), dtype=tf.int64),
-        row_splits=tf.range(tf.size(record) + 1, dtype=tf.int64))
-    result["sparse_record_index"] = result["ragged_record_index"].to_sparse()
-    return result
+    def decode_record(self, record):
+        result = super(_DecoderForTestingWithRecordIndex, self).decode_record(record)
+        result["ragged_record_index"] = tf.RaggedTensor.from_row_splits(
+            values=tf.range(tf.size(record), dtype=tf.int64),
+            row_splits=tf.range(tf.size(record) + 1, dtype=tf.int64),
+        )
+        result["sparse_record_index"] = result["ragged_record_index"].to_sparse()
+        return result
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-class _DecoderForTestingWithRaggedRecordIndex(
-    _DecoderForTestingWithRecordIndex):
-
-  @property
-  def record_index_tensor_name(self):
-    return "ragged_record_index"
+class _DecoderForTestingWithRaggedRecordIndex(_DecoderForTestingWithRecordIndex):
+    @property
+    def record_index_tensor_name(self):
+        return "ragged_record_index"
 
 
-class _DecoderForTestingWithSparseRecordIndex(
-    _DecoderForTestingWithRecordIndex):
-
-  @property
-  def record_index_tensor_name(self):
-    return "sparse_record_index"
+class _DecoderForTestingWithSparseRecordIndex(_DecoderForTestingWithRecordIndex):
+    @property
+    def record_index_tensor_name(self):
+        return "sparse_record_index"
 
 
 _RECORDS = [b"aaa", b"bbb"]
-_RECORDS_AS_TENSORS = [{
-    "st1":
-        tf.SparseTensor(values=[b"aaa"], indices=[[0, 0]], dense_shape=[1, 1]),
-    "st2":
-        tf.SparseTensor(values=[b"aaa"], indices=[[0, 0]], dense_shape=[1, 1])
-}, {
-    "st1":
-        tf.SparseTensor(values=[b"bbb"], indices=[[0, 0]], dense_shape=[1, 1]),
-    "st2":
-        tf.SparseTensor(values=[b"bbb"], indices=[[0, 0]], dense_shape=[1, 1])
-}]
+_RECORDS_AS_TENSORS = [
+    {
+        "st1": tf.SparseTensor(values=[b"aaa"], indices=[[0, 0]], dense_shape=[1, 1]),
+        "st2": tf.SparseTensor(values=[b"aaa"], indices=[[0, 0]], dense_shape=[1, 1]),
+    },
+    {
+        "st1": tf.SparseTensor(values=[b"bbb"], indices=[[0, 0]], dense_shape=[1, 1]),
+        "st2": tf.SparseTensor(values=[b"bbb"], indices=[[0, 0]], dense_shape=[1, 1]),
+    },
+]
 _TELEMETRY_DESCRIPTORS = ["Some", "Component"]
 
 
 def _write_input():
-  result = os.path.join(tempfile.mkdtemp(dir=FLAGS.test_tmpdir), "input")
-  with tf.io.TFRecordWriter(result) as w:
-    for r in _RECORDS:
-      w.write(r)
+    result = os.path.join(tempfile.mkdtemp(dir=FLAGS.test_tmpdir), "input")
+    with tf.io.TFRecordWriter(result) as w:
+        for r in _RECORDS:
+            w.write(r)
 
-  return result
+    return result
 
 
 def _write_decoder(decoder=_DecoderForTesting()):
-  result = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
-  tf_graph_record_decoder.save_decoder(decoder, result)
-  return result
+    result = tempfile.mkdtemp(dir=FLAGS.test_tmpdir)
+    tf_graph_record_decoder.save_decoder(decoder, result)
+    return result
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class RecordToTensorTfxioTest(tf.test.TestCase, parameterized.TestCase):
-
-  def setUp(self):
-    super().setUp()
-    self._input_path = _write_input()
-
-  def _assert_sparse_tensor_equal(self, lhs, rhs):
-    self.assertAllEqual(lhs.values, rhs.values)
-    self.assertAllEqual(lhs.indices, rhs.indices)
-    self.assertAllEqual(lhs.dense_shape, rhs.dense_shape)
-
-  # pylint: disable=unnecessary-lambda
-  # the create_decoder lambdas may seem unnecessary, but they are picklable, and
-  # the classes (not the instances of those classes) are not.
-  @parameterized.named_parameters(*[
-      dict(testcase_name="attach_raw_records",
-           attach_raw_records=True,
-           create_decoder=lambda: _DecoderForTesting()),
-      dict(testcase_name="attach_raw_records_with_ragged_record_index",
-           attach_raw_records=True,
-           create_decoder=lambda: _DecoderForTestingWithRaggedRecordIndex()),
-      dict(testcase_name="attach_raw_records_with_sparse_record_index",
-           attach_raw_records=True,
-           create_decoder=lambda: _DecoderForTestingWithSparseRecordIndex()),
-      dict(testcase_name="noattach_raw_records",
-           attach_raw_records=False,
-           create_decoder=lambda: _DecoderForTesting()),
-      dict(testcase_name="noattach_raw_records_but_with_record_index",
-           attach_raw_records=False,
-           create_decoder=lambda: _DecoderForTestingWithSparseRecordIndex()),
-      dict(testcase_name="beam_record_tfxio",
-           attach_raw_records=False,
-           create_decoder=lambda: _DecoderForTesting(),
-           beam_record_tfxio=True),
-  ])
-  # pylint: enable=unnecessary-lambda
-  def test_beam_source_and_tensor_adapter(
-      self, attach_raw_records, create_decoder, beam_record_tfxio=False):
-    decoder = create_decoder()
-    raw_record_column_name = "_raw_records" if attach_raw_records else None
-    decoder_path = _write_decoder(decoder)
-    if beam_record_tfxio:
-      tfxio = record_to_tensor_tfxio.BeamRecordToTensorTFXIO(
-          saved_decoder_path=decoder_path,
-          telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
-          physical_format="tfrecords_gzip",
-          raw_record_column_name=raw_record_column_name)
-    else:
-      tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-          self._input_path,
-          decoder_path,
-          _TELEMETRY_DESCRIPTORS,
-          raw_record_column_name=raw_record_column_name)
-    expected_tensor_representations = {
-        "st1":
-            text_format.Parse("""varlen_sparse_tensor { column_name: "st1" }""",
-                              schema_pb2.TensorRepresentation()),
-        "st2":
-            text_format.Parse("""varlen_sparse_tensor { column_name: "st2" }""",
-                              schema_pb2.TensorRepresentation())
-    }
-    if isinstance(decoder, _DecoderForTestingWithRecordIndex):
-      expected_fields = [
-          pa.field("ragged_record_index", pa.large_list(pa.int64())),
-          pa.field("sparse_record_index", pa.large_list(pa.int64())),
-          pa.field("st1", pa.large_list(pa.large_binary())),
-          pa.field("st2", pa.large_list(pa.large_binary())),
-      ]
-      expected_tensor_representations["ragged_record_index"] = (
-          text_format.Parse(
-              """ragged_tensor {
+    def setUp(self):
+        super().setUp()
+        self._input_path = _write_input()
+
+    def _assert_sparse_tensor_equal(self, lhs, rhs):
+        self.assertAllEqual(lhs.values, rhs.values)
+        self.assertAllEqual(lhs.indices, rhs.indices)
+        self.assertAllEqual(lhs.dense_shape, rhs.dense_shape)
+
+    # pylint: disable=unnecessary-lambda
+    # the create_decoder lambdas may seem unnecessary, but they are picklable, and
+    # the classes (not the instances of those classes) are not.
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="attach_raw_records",
+                attach_raw_records=True,
+                create_decoder=lambda: _DecoderForTesting(),
+            ),
+            dict(
+                testcase_name="attach_raw_records_with_ragged_record_index",
+                attach_raw_records=True,
+                create_decoder=lambda: _DecoderForTestingWithRaggedRecordIndex(),
+            ),
+            dict(
+                testcase_name="attach_raw_records_with_sparse_record_index",
+                attach_raw_records=True,
+                create_decoder=lambda: _DecoderForTestingWithSparseRecordIndex(),
+            ),
+            dict(
+                testcase_name="noattach_raw_records",
+                attach_raw_records=False,
+                create_decoder=lambda: _DecoderForTesting(),
+            ),
+            dict(
+                testcase_name="noattach_raw_records_but_with_record_index",
+                attach_raw_records=False,
+                create_decoder=lambda: _DecoderForTestingWithSparseRecordIndex(),
+            ),
+            dict(
+                testcase_name="beam_record_tfxio",
+                attach_raw_records=False,
+                create_decoder=lambda: _DecoderForTesting(),
+                beam_record_tfxio=True,
+            ),
+        ]
+    )
+    # pylint: enable=unnecessary-lambda
+    def test_beam_source_and_tensor_adapter(
+        self, attach_raw_records, create_decoder, beam_record_tfxio=False
+    ):
+        decoder = create_decoder()
+        raw_record_column_name = "_raw_records" if attach_raw_records else None
+        decoder_path = _write_decoder(decoder)
+        if beam_record_tfxio:
+            tfxio = record_to_tensor_tfxio.BeamRecordToTensorTFXIO(
+                saved_decoder_path=decoder_path,
+                telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+                physical_format="tfrecords_gzip",
+                raw_record_column_name=raw_record_column_name,
+            )
+        else:
+            tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+                self._input_path,
+                decoder_path,
+                _TELEMETRY_DESCRIPTORS,
+                raw_record_column_name=raw_record_column_name,
+            )
+        expected_tensor_representations = {
+            "st1": text_format.Parse(
+                """varlen_sparse_tensor { column_name: "st1" }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+            "st2": text_format.Parse(
+                """varlen_sparse_tensor { column_name: "st2" }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
+        if isinstance(decoder, _DecoderForTestingWithRecordIndex):
+            expected_fields = [
+                pa.field("ragged_record_index", pa.large_list(pa.int64())),
+                pa.field("sparse_record_index", pa.large_list(pa.int64())),
+                pa.field("st1", pa.large_list(pa.large_binary())),
+                pa.field("st2", pa.large_list(pa.large_binary())),
+            ]
+            expected_tensor_representations["ragged_record_index"] = text_format.Parse(
+                """ragged_tensor {
                    feature_path: { step: "ragged_record_index" }
                    row_partition_dtype: INT64
-                 }""", schema_pb2.TensorRepresentation()))
-      expected_tensor_representations["sparse_record_index"] = (
-          text_format.Parse(
-              """varlen_sparse_tensor { column_name: "sparse_record_index" }""",
-              schema_pb2.TensorRepresentation()))
-    else:
-      expected_fields = [
-          pa.field("st1", pa.large_list(pa.large_binary())),
-          pa.field("st2", pa.large_list(pa.large_binary())),
-      ]
-    if attach_raw_records:
-      expected_fields.append(
-          pa.field(raw_record_column_name, pa.large_list(pa.large_binary())))
-    self.assertTrue(tfxio.ArrowSchema().equals(
-        pa.schema(expected_fields)), tfxio.ArrowSchema())
-
-    self.assertEqual(
-        tfxio.TensorRepresentations(), expected_tensor_representations)
-
-    tensor_adapter = tfxio.TensorAdapter()
-    tensor_adapter_type_specs = tensor_adapter.TypeSpecs()
-    for tensor_name, type_spec in decoder.output_type_specs().items():
-      self.assertTrue(
-          tensor_adapter_type_specs[tensor_name].is_compatible_with(type_spec))
-
-    def _assert_fn(list_of_rb):
-      self.assertLen(list_of_rb, 1)
-      rb = list_of_rb[0]
-      self.assertTrue(rb.schema.equals(tfxio.ArrowSchema()))
-      if attach_raw_records:
-        self.assertEqual(rb.column(rb.num_columns - 1).flatten().to_pylist(),
-                         _RECORDS)
-      tensors = tensor_adapter.ToBatchTensors(rb)
-      for tensor_name in ("st1", "st2"):
-        self.assertIn(tensor_name, tensors)
-        st = tensors[tensor_name]
-        self.assertAllEqual(st.values, _RECORDS)
-        self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
-        self.assertAllEqual(st.dense_shape, [2, 1])
-
-    p = beam.Pipeline()
-    pipeline_input = (p | beam.Create(_RECORDS)) if beam_record_tfxio else p
-    rb_pcoll = pipeline_input | tfxio.BeamSource(batch_size=len(_RECORDS))
-    beam_testing_util.assert_that(rb_pcoll, _assert_fn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(
-        self, pipeline_result, _TELEMETRY_DESCRIPTORS,
-        "tensor", "tfrecords_gzip")
-
-  def test_project(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    projected = tfxio.Project(["st1"])
-    self.assertIn("st1", projected.TensorRepresentations())
-    self.assertNotIn("st2", projected.TensorRepresentations())
-    tensor_adapter = projected.TensorAdapter()
-
-    def _assert_fn(list_of_rb):
-      self.assertLen(list_of_rb, 1)
-      rb = list_of_rb[0]
-      tensors = tensor_adapter.ToBatchTensors(rb)
-      self.assertLen(tensors, 1)
-      self.assertIn("st1", tensors)
-      st = tensors["st1"]
-      self.assertAllEqual(st.values, _RECORDS)
-      self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
-      self.assertAllEqual(st.dense_shape, [2, 1])
-
-    with beam.Pipeline() as p:
-      rb_pcoll = p | tfxio.BeamSource(batch_size=len(_RECORDS))
-      beam_testing_util.assert_that(rb_pcoll, _assert_fn)
-
-  def test_tensorflow_dataset(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, decoded_tensors_dict in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      for key, tensor in decoded_tensors_dict.items():
-        self._assert_sparse_tensor_equal(tensor, _RECORDS_AS_TENSORS[i][key])
-
-  def test_projected_tensorflow_dataset(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    feature_name = "st1"
-    projected_tfxio = tfxio.Project([feature_name])
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, decoded_tensors_dict in enumerate(
-        projected_tfxio.TensorFlowDataset(options=options)):
-      self.assertIn(feature_name, decoded_tensors_dict)
-      self.assertLen(decoded_tensors_dict, 1)
-      tensor = decoded_tensors_dict[feature_name]
-      self._assert_sparse_tensor_equal(
-          tensor, _RECORDS_AS_TENSORS[i][feature_name])
-
-  def test_tensorflow_dataset_with_label_key(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    label_key = "st1"
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1, label_key=label_key)
-    for i, (decoded_tensors_dict, label_feature) in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      self._assert_sparse_tensor_equal(
-          label_feature, _RECORDS_AS_TENSORS[i][label_key])
-      for key, tensor in decoded_tensors_dict.items():
-        self._assert_sparse_tensor_equal(tensor, _RECORDS_AS_TENSORS[i][key])
-
-  def test_tensorflow_dataset_with_invalid_label_key(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    label_key = "invalid"
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1, label_key=label_key)
-    with self.assertRaisesRegex(ValueError, "The `label_key` provided.*"):
-      tfxio.TensorFlowDataset(options=options)
-
-  def test_get_decode_function(self):
-    decoder_path = _write_decoder()
-    tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
-        self._input_path, decoder_path, ["some", "component"])
-    decode_fn = tfxio.DecodeFunction()
-    decoded = decode_fn(tf.constant(_RECORDS))
-    for tensor_name in ("st1", "st2"):
-      self.assertIn(tensor_name, decoded)
-      st = decoded[tensor_name]
-      self.assertAllEqual(st.values, _RECORDS)
-      self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
-      self.assertAllEqual(st.dense_shape, [2, 1])
+                 }""",
+                schema_pb2.TensorRepresentation(),
+            )
+            expected_tensor_representations["sparse_record_index"] = text_format.Parse(
+                """varlen_sparse_tensor { column_name: "sparse_record_index" }""",
+                schema_pb2.TensorRepresentation(),
+            )
+        else:
+            expected_fields = [
+                pa.field("st1", pa.large_list(pa.large_binary())),
+                pa.field("st2", pa.large_list(pa.large_binary())),
+            ]
+        if attach_raw_records:
+            expected_fields.append(
+                pa.field(raw_record_column_name, pa.large_list(pa.large_binary()))
+            )
+        self.assertTrue(
+            tfxio.ArrowSchema().equals(pa.schema(expected_fields)), tfxio.ArrowSchema()
+        )
+
+        self.assertEqual(tfxio.TensorRepresentations(), expected_tensor_representations)
+
+        tensor_adapter = tfxio.TensorAdapter()
+        tensor_adapter_type_specs = tensor_adapter.TypeSpecs()
+        for tensor_name, type_spec in decoder.output_type_specs().items():
+            self.assertTrue(
+                tensor_adapter_type_specs[tensor_name].is_compatible_with(type_spec)
+            )
+
+        def _assert_fn(list_of_rb):
+            self.assertLen(list_of_rb, 1)
+            rb = list_of_rb[0]
+            self.assertTrue(rb.schema.equals(tfxio.ArrowSchema()))
+            if attach_raw_records:
+                self.assertEqual(
+                    rb.column(rb.num_columns - 1).flatten().to_pylist(), _RECORDS
+                )
+            tensors = tensor_adapter.ToBatchTensors(rb)
+            for tensor_name in ("st1", "st2"):
+                self.assertIn(tensor_name, tensors)
+                st = tensors[tensor_name]
+                self.assertAllEqual(st.values, _RECORDS)
+                self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
+                self.assertAllEqual(st.dense_shape, [2, 1])
+
+        p = beam.Pipeline()
+        pipeline_input = (p | beam.Create(_RECORDS)) if beam_record_tfxio else p
+        rb_pcoll = pipeline_input | tfxio.BeamSource(batch_size=len(_RECORDS))
+        beam_testing_util.assert_that(rb_pcoll, _assert_fn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self, pipeline_result, _TELEMETRY_DESCRIPTORS, "tensor", "tfrecords_gzip"
+        )
+
+    def test_project(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        projected = tfxio.Project(["st1"])
+        self.assertIn("st1", projected.TensorRepresentations())
+        self.assertNotIn("st2", projected.TensorRepresentations())
+        tensor_adapter = projected.TensorAdapter()
+
+        def _assert_fn(list_of_rb):
+            self.assertLen(list_of_rb, 1)
+            rb = list_of_rb[0]
+            tensors = tensor_adapter.ToBatchTensors(rb)
+            self.assertLen(tensors, 1)
+            self.assertIn("st1", tensors)
+            st = tensors["st1"]
+            self.assertAllEqual(st.values, _RECORDS)
+            self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
+            self.assertAllEqual(st.dense_shape, [2, 1])
+
+        with beam.Pipeline() as p:
+            rb_pcoll = p | tfxio.BeamSource(batch_size=len(_RECORDS))
+            beam_testing_util.assert_that(rb_pcoll, _assert_fn)
+
+    def test_tensorflow_dataset(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, decoded_tensors_dict in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            for key, tensor in decoded_tensors_dict.items():
+                self._assert_sparse_tensor_equal(tensor, _RECORDS_AS_TENSORS[i][key])
+
+    def test_projected_tensorflow_dataset(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        feature_name = "st1"
+        projected_tfxio = tfxio.Project([feature_name])
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, decoded_tensors_dict in enumerate(
+            projected_tfxio.TensorFlowDataset(options=options)
+        ):
+            self.assertIn(feature_name, decoded_tensors_dict)
+            self.assertLen(decoded_tensors_dict, 1)
+            tensor = decoded_tensors_dict[feature_name]
+            self._assert_sparse_tensor_equal(
+                tensor, _RECORDS_AS_TENSORS[i][feature_name]
+            )
+
+    def test_tensorflow_dataset_with_label_key(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        label_key = "st1"
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1, label_key=label_key
+        )
+        for i, (decoded_tensors_dict, label_feature) in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            self._assert_sparse_tensor_equal(
+                label_feature, _RECORDS_AS_TENSORS[i][label_key]
+            )
+            for key, tensor in decoded_tensors_dict.items():
+                self._assert_sparse_tensor_equal(tensor, _RECORDS_AS_TENSORS[i][key])
+
+    def test_tensorflow_dataset_with_invalid_label_key(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        label_key = "invalid"
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1, label_key=label_key
+        )
+        with self.assertRaisesRegex(ValueError, "The `label_key` provided.*"):
+            tfxio.TensorFlowDataset(options=options)
+
+    def test_get_decode_function(self):
+        decoder_path = _write_decoder()
+        tfxio = record_to_tensor_tfxio.TFRecordToTensorTFXIO(
+            self._input_path, decoder_path, ["some", "component"]
+        )
+        decode_fn = tfxio.DecodeFunction()
+        decoded = decode_fn(tf.constant(_RECORDS))
+        for tensor_name in ("st1", "st2"):
+            self.assertIn(tensor_name, decoded)
+            st = decoded[tensor_name]
+            self.assertAllEqual(st.values, _RECORDS)
+            self.assertAllEqual(st.indices, [[0, 0], [1, 0]])
+            self.assertAllEqual(st.dense_shape, [2, 1])
 
 
 if __name__ == "__main__":
-  tf.test.main()
+    tf.test.main()
diff --git a/tfx_bsl/tfxio/telemetry.py b/tfx_bsl/tfxio/telemetry.py
index 62a4d812..96af9e96 100644
--- a/tfx_bsl/tfxio/telemetry.py
+++ b/tfx_bsl/tfxio/telemetry.py
@@ -14,13 +14,13 @@
 """Contains PTransforms that collects telemetry from produced by TFXIO."""
 
 import enum
-from typing import Iterable, Callable, List, Optional, Text
+from typing import Callable, Iterable, List, Optional, Text
 
 import apache_beam as beam
 import numpy as np
 import pyarrow as pa
-from tfx_bsl.arrow import array_util
-from tfx_bsl.arrow import table_util
+
+from tfx_bsl.arrow import array_util, table_util
 from tfx_bsl.telemetry import util as telemetry_util
 
 
@@ -29,34 +29,43 @@
 @beam.ptransform_fn
 def ProfileRecordBatches(
     pcoll: beam.PCollection,
-    telemetry_descriptors: Optional[List[Text]],
-    logical_format: Text,
-    physical_format: Text,
-    distribution_update_probability: float = 0.1) -> beam.PCollection:
-  """An identity transform to profile RecordBatches and updated Beam metrics.
-
-  Args:
-    pcoll: a PCollection[pa.RecordBatch]
-    telemetry_descriptors: a set of descriptors that identify the component that
-      invokes this PTransform. These will be used to construct the namespace
-      to contain the beam metrics created within this PTransform. All such
-      namespaces will be prefixed by "tfxio.". If None, a default "unknown"
-      descriptor will be used.
-    logical_format: the logical format of the data (before parsed into
-      RecordBatches). Used to construct metric names.
-    physical_format: the physical format in which the data is stored on disk.
-      Used to construct metric names.
-    distribution_update_probability: probability to update the expensive,
-      per-row distributions.
-
-  Returns:
-    `pcoll` (identity function).
-  """
-  assert 0 < distribution_update_probability <= 1.0, (
-      "Invalid probability: {}".format(distribution_update_probability))
-  return pcoll | "ProfileRecordBatches" >> beam.ParDo(
-      _ProfileRecordBatchDoFn(telemetry_descriptors, logical_format,
-                              physical_format, distribution_update_probability))
+    telemetry_descriptors: Optional[List[str]],
+    logical_format: str,
+    physical_format: str,
+    distribution_update_probability: float = 0.1,
+) -> beam.PCollection:
+    """An identity transform to profile RecordBatches and updated Beam metrics.
+
+    Args:
+    ----
+      pcoll: a PCollection[pa.RecordBatch]
+      telemetry_descriptors: a set of descriptors that identify the component that
+        invokes this PTransform. These will be used to construct the namespace
+        to contain the beam metrics created within this PTransform. All such
+        namespaces will be prefixed by "tfxio.". If None, a default "unknown"
+        descriptor will be used.
+      logical_format: the logical format of the data (before parsed into
+        RecordBatches). Used to construct metric names.
+      physical_format: the physical format in which the data is stored on disk.
+        Used to construct metric names.
+      distribution_update_probability: probability to update the expensive,
+        per-row distributions.
+
+    Returns:
+    -------
+      `pcoll` (identity function).
+    """
+    assert (
+        0 < distribution_update_probability <= 1.0
+    ), f"Invalid probability: {distribution_update_probability}"
+    return pcoll | "ProfileRecordBatches" >> beam.ParDo(
+        _ProfileRecordBatchDoFn(
+            telemetry_descriptors,
+            logical_format,
+            physical_format,
+            distribution_update_probability,
+        )
+    )
 
 
 @beam.typehints.with_input_types(bytes)
@@ -64,20 +73,22 @@ def ProfileRecordBatches(
 @beam.ptransform_fn
 def ProfileRawRecords(
     pcoll: beam.PCollection,
-    telemetry_descriptors: Optional[List[Text]],
-    logical_format: Text,
-    physical_format: Text) -> beam.PCollection:
-  """An identity transform to profile raw records for record based TFXIO."""
-  return pcoll | "ProfileRawRecords" >> beam.ParDo(_ProfileRawRecordDoFn(
-      telemetry_descriptors, logical_format, physical_format))
+    telemetry_descriptors: Optional[List[str]],
+    logical_format: str,
+    physical_format: str,
+) -> beam.PCollection:
+    """An identity transform to profile raw records for record based TFXIO."""
+    return pcoll | "ProfileRawRecords" >> beam.ParDo(
+        _ProfileRawRecordDoFn(telemetry_descriptors, logical_format, physical_format)
+    )
 
 
 class _ValueType(enum.IntEnum):
-  INT = 0
-  FLOAT = 1
-  STRING = 2
-  NULL = 3  # pa.is_null()
-  OTHER = 4
+    INT = 0
+    FLOAT = 1
+    STRING = 2
+    NULL = 3  # pa.is_null()
+    OTHER = 4
 
 
 _IO_TELEMETRY_DESCRIPTOR = ["io"]
@@ -85,191 +96,215 @@ class _ValueType(enum.IntEnum):
 
 
 class _ProfileRecordBatchDoFn(beam.DoFn):
-  """A DoFn that profiles RecordBatches and updates Beam counters.
-
-  The following metrics are maintained:
-
-  num_rows: Counter. Total number of rows.
-  record_batch_byte_size: Distribution. In-memory size of the RecordBatches.
-  num_columns: Distribution. Number of present columns per row.
-      A column is present in a row if its value is not None.
-  num_feature_values: Distribution. Number of (primitive) values per cell.
-  num_feature_values[_ValueType]: Distribution. Similar to num_feature_values,
-      but sliced by _ValueType.
-  num_cells[_ValueType]: Counter. Total number of cells by _ValueType. Note that
-      it's sliced by primitive_type if a column is of type
-      list<primitive_type>. For other columns, the slice is OTHER.
-  """
-
-  def __init__(self, telemetry_descriptors: Optional[List[Text]],
-               logical_format: Text,
-               physical_format: Text, dist_update_prob: float):
-    if telemetry_descriptors is None:
-      telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS
-    metric_namespace = telemetry_util.MakeTfxNamespace(telemetry_descriptors +
-                                                       _IO_TELEMETRY_DESCRIPTOR)
-    namer = _GetMetricNamer(logical_format, physical_format)
-    self._num_rows_dist = beam.metrics.Metrics.distribution(
-        metric_namespace, namer("num_rows")
-    )
-    self._byte_size_dist = beam.metrics.Metrics.distribution(
-        metric_namespace, namer("record_batch_byte_size"))
-    self._num_columns_dist = beam.metrics.Metrics.distribution(
-        metric_namespace, namer("num_columns"))
-    self._num_feature_values_dist = beam.metrics.Metrics.distribution(
-        metric_namespace, namer("num_feature_values"))
-    self._num_feature_values_dist_by_type = {
-        t: beam.metrics.Metrics.distribution(
-            metric_namespace, namer("num_feature_values[{}]".format(t.name)))
-        for t in _ValueType
-    }
-    self._num_cells_by_type = {
-        t: beam.metrics.Metrics.counter(metric_namespace,
-                                        namer("num_cells[{}]".format(t.name)))
-        for t in _ValueType
-    }
-    self._dist_update_prob = dist_update_prob
-
-  def _UpdateNumColumnsDist(self, record_batch: pa.RecordBatch) -> None:
-    # Define number of columns of a row to be the number of cells in that row
-    # whose values are not null.
-    # It can be computed by summing up (element wise) the negation of null
-    # flags (converted to integer) of all the arrays.
-    null_bitmaps = [
-        np.asarray(array_util.GetArrayNullBitmapAsByteArray(c)).view(bool)
-        for c in record_batch]
-    indicators = [(~bitmap).view(np.uint8) for bitmap in null_bitmaps]
-    sum_indicators = np.zeros(record_batch.num_rows, dtype=np.int64)
-    for indicator in indicators:
-      np.add(sum_indicators, indicator, out=sum_indicators)
-    for num_column in sum_indicators.tolist():
-      self._num_columns_dist.update(num_column)
-
-  def _UpdateNumValuesDist(self, record_batch: pa.RecordBatch) -> None:
-    # Updates the distribution of number of values per cell.
-    # Note that a cell could be of a deeper nested type (e.g.
-    # Struct or nested ListArray), the number of values actually means
-    # lengths of leaves.
-    # For example, given the following row:
-    # col1               |    col2
-    # [[[1, 2], [3]]]    |    [{'a': [1, 2]}, {'b': [3]}]]
-    # the number of values for col1 is 3
-    # the number of values for col2 will be updated twice because there are
-    # two leaves (col2.a, col2.b), with values 2, 1 respectively.
-
-    # Algorithm: create a mapping `m` (int->int) for array `a` so that if
-    # m[i] == j, then a[i] belongs to row j in the record batch.
-    # Then, np.bincount(m, minlength=record_batch.num_rows)[i] is how many
-    # values in `a` belong to row i. As we flatten the array, the mapping
-    # needs to be maintained so that it maps a flattened value to a row.
-    num_rows = record_batch.num_rows
-
-    def _RecursionHelper(row_indices, array):
-      """Flattens `array` while maintains the `row_indices`."""
-      array_type = array.type
-      if _IsListLike(array_type):
-        parent_indices = np.asarray(
-            array_util.GetFlattenedArrayParentIndices(array))
-        _RecursionHelper(row_indices[parent_indices], array.flatten())
-      elif pa.types.is_struct(array_type):
-        for child in array.flatten():
-          _RecursionHelper(row_indices, child)
-      else:
-        value_type = _GetValueType(array.type)
-        dist_by_type = self._num_feature_values_dist_by_type[value_type]
-        for num_values in np.bincount(row_indices, minlength=num_rows).tolist():
-          dist_by_type.update(num_values)
-          self._num_feature_values_dist.update(num_values)
-
-    for column in record_batch:
-      _RecursionHelper(np.arange(num_rows, dtype=np.int64), column)
-
-  def _UpdateNumCellsCounters(self, record_batch: pa.RecordBatch) -> None:
-    num_rows = record_batch.num_rows
-    for column in record_batch:
-      column_type = column.type
-      if pa.types.is_null(column_type):
-        self._num_cells_by_type[_ValueType.NULL].inc(num_rows)
-        continue
-
-      if _IsListLike(column_type):
-        value_type = _GetValueType(column_type.value_type)
-      else:
-        value_type = _ValueType.OTHER
-      self._num_cells_by_type[value_type].inc(num_rows - column.null_count)
-
-  def process(self, record_batch: pa.RecordBatch) -> Iterable[pa.RecordBatch]:
-    self._num_rows_dist.update(record_batch.num_rows)
-    self._UpdateNumCellsCounters(record_batch)
-    total_byte_size = table_util.TotalByteSize(
-        record_batch, ignore_unsupported=True)
-    self._byte_size_dist.update(total_byte_size)
-    # These distributions are per-row therefore expensive to update because
-    # dist.update() needs to be called num_rows * k times.
-    if np.random.rand() < self._dist_update_prob:
-      self._UpdateNumColumnsDist(record_batch)
-      self._UpdateNumValuesDist(record_batch)
-    yield record_batch
+    """A DoFn that profiles RecordBatches and updates Beam counters.
+
+    The following metrics are maintained:
+
+    num_rows: Counter. Total number of rows.
+    record_batch_byte_size: Distribution. In-memory size of the RecordBatches.
+    num_columns: Distribution. Number of present columns per row.
+        A column is present in a row if its value is not None.
+    num_feature_values: Distribution. Number of (primitive) values per cell.
+    num_feature_values[_ValueType]: Distribution. Similar to num_feature_values,
+        but sliced by _ValueType.
+    num_cells[_ValueType]: Counter. Total number of cells by _ValueType. Note that
+        it's sliced by primitive_type if a column is of type
+        list<primitive_type>. For other columns, the slice is OTHER.
+    """
+
+    def __init__(
+        self,
+        telemetry_descriptors: Optional[List[str]],
+        logical_format: str,
+        physical_format: str,
+        dist_update_prob: float,
+    ):
+        if telemetry_descriptors is None:
+            telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS
+        metric_namespace = telemetry_util.MakeTfxNamespace(
+            telemetry_descriptors + _IO_TELEMETRY_DESCRIPTOR
+        )
+        namer = _GetMetricNamer(logical_format, physical_format)
+        self._num_rows_dist = beam.metrics.Metrics.distribution(
+            metric_namespace, namer("num_rows")
+        )
+        self._byte_size_dist = beam.metrics.Metrics.distribution(
+            metric_namespace, namer("record_batch_byte_size")
+        )
+        self._num_columns_dist = beam.metrics.Metrics.distribution(
+            metric_namespace, namer("num_columns")
+        )
+        self._num_feature_values_dist = beam.metrics.Metrics.distribution(
+            metric_namespace, namer("num_feature_values")
+        )
+        self._num_feature_values_dist_by_type = {
+            t: beam.metrics.Metrics.distribution(
+                metric_namespace, namer(f"num_feature_values[{t.name}]")
+            )
+            for t in _ValueType
+        }
+        self._num_cells_by_type = {
+            t: beam.metrics.Metrics.counter(
+                metric_namespace, namer(f"num_cells[{t.name}]")
+            )
+            for t in _ValueType
+        }
+        self._dist_update_prob = dist_update_prob
+
+    def _UpdateNumColumnsDist(self, record_batch: pa.RecordBatch) -> None:
+        # Define number of columns of a row to be the number of cells in that row
+        # whose values are not null.
+        # It can be computed by summing up (element wise) the negation of null
+        # flags (converted to integer) of all the arrays.
+        null_bitmaps = [
+            np.asarray(array_util.GetArrayNullBitmapAsByteArray(c)).view(bool)
+            for c in record_batch
+        ]
+        indicators = [(~bitmap).view(np.uint8) for bitmap in null_bitmaps]
+        sum_indicators = np.zeros(record_batch.num_rows, dtype=np.int64)
+        for indicator in indicators:
+            np.add(sum_indicators, indicator, out=sum_indicators)
+        for num_column in sum_indicators.tolist():
+            self._num_columns_dist.update(num_column)
+
+    def _UpdateNumValuesDist(self, record_batch: pa.RecordBatch) -> None:
+        # Updates the distribution of number of values per cell.
+        # Note that a cell could be of a deeper nested type (e.g.
+        # Struct or nested ListArray), the number of values actually means
+        # lengths of leaves.
+        # For example, given the following row:
+        # col1               |    col2
+        # [[[1, 2], [3]]]    |    [{'a': [1, 2]}, {'b': [3]}]]
+        # the number of values for col1 is 3
+        # the number of values for col2 will be updated twice because there are
+        # two leaves (col2.a, col2.b), with values 2, 1 respectively.
+
+        # Algorithm: create a mapping `m` (int->int) for array `a` so that if
+        # m[i] == j, then a[i] belongs to row j in the record batch.
+        # Then, np.bincount(m, minlength=record_batch.num_rows)[i] is how many
+        # values in `a` belong to row i. As we flatten the array, the mapping
+        # needs to be maintained so that it maps a flattened value to a row.
+        num_rows = record_batch.num_rows
+
+        def _RecursionHelper(row_indices, array):
+            """Flattens `array` while maintains the `row_indices`."""
+            array_type = array.type
+            if _IsListLike(array_type):
+                parent_indices = np.asarray(
+                    array_util.GetFlattenedArrayParentIndices(array)
+                )
+                _RecursionHelper(row_indices[parent_indices], array.flatten())
+            elif pa.types.is_struct(array_type):
+                for child in array.flatten():
+                    _RecursionHelper(row_indices, child)
+            else:
+                value_type = _GetValueType(array.type)
+                dist_by_type = self._num_feature_values_dist_by_type[value_type]
+                for num_values in np.bincount(row_indices, minlength=num_rows).tolist():
+                    dist_by_type.update(num_values)
+                    self._num_feature_values_dist.update(num_values)
+
+        for column in record_batch:
+            _RecursionHelper(np.arange(num_rows, dtype=np.int64), column)
+
+    def _UpdateNumCellsCounters(self, record_batch: pa.RecordBatch) -> None:
+        num_rows = record_batch.num_rows
+        for column in record_batch:
+            column_type = column.type
+            if pa.types.is_null(column_type):
+                self._num_cells_by_type[_ValueType.NULL].inc(num_rows)
+                continue
+
+            if _IsListLike(column_type):
+                value_type = _GetValueType(column_type.value_type)
+            else:
+                value_type = _ValueType.OTHER
+            self._num_cells_by_type[value_type].inc(num_rows - column.null_count)
+
+    def process(self, record_batch: pa.RecordBatch) -> Iterable[pa.RecordBatch]:
+        self._num_rows_dist.update(record_batch.num_rows)
+        self._UpdateNumCellsCounters(record_batch)
+        total_byte_size = table_util.TotalByteSize(
+            record_batch, ignore_unsupported=True
+        )
+        self._byte_size_dist.update(total_byte_size)
+        # These distributions are per-row therefore expensive to update because
+        # dist.update() needs to be called num_rows * k times.
+        if np.random.rand() < self._dist_update_prob:
+            self._UpdateNumColumnsDist(record_batch)
+            self._UpdateNumValuesDist(record_batch)
+        yield record_batch
 
 
 def _IsListLike(data_type: pa.DataType) -> bool:
-  return pa.types.is_list(data_type) or pa.types.is_large_list(data_type)
+    return pa.types.is_list(data_type) or pa.types.is_large_list(data_type)
 
 
 def _GetValueType(data_type: pa.DataType) -> _ValueType:
-  """Maps a `pa.DataType` to `ValueType`."""
-  if pa.types.is_integer(data_type):
-    return _ValueType.INT
-  if pa.types.is_floating(data_type):
-    return _ValueType.FLOAT
-  if (pa.types.is_string(data_type) or
-      pa.types.is_binary(data_type) or
-      pa.types.is_large_string(data_type) or
-      pa.types.is_large_binary(data_type)):
-    return _ValueType.STRING
-  if pa.types.is_null(data_type):
-    return _ValueType.NULL
-  return _ValueType.OTHER
+    """Maps a `pa.DataType` to `ValueType`."""
+    if pa.types.is_integer(data_type):
+        return _ValueType.INT
+    if pa.types.is_floating(data_type):
+        return _ValueType.FLOAT
+    if (
+        pa.types.is_string(data_type)
+        or pa.types.is_binary(data_type)
+        or pa.types.is_large_string(data_type)
+        or pa.types.is_large_binary(data_type)
+    ):
+        return _ValueType.STRING
+    if pa.types.is_null(data_type):
+        return _ValueType.NULL
+    return _ValueType.OTHER
 
 
 class _ProfileRawRecordDoFn(beam.DoFn):
-  """A DoFn that profiles raw records and updates Beam counters.
-
-  The following metrics are maintained:
-
-  num_raw_records: Counter. Total number of rows.
-  raw_record_byte_size: Distribution. Byte size of the raw records.
-  """
-
-  def __init__(self, telemetry_descriptors: Optional[List[Text]],
-               logical_format: Text, physical_format: Text):
-    if telemetry_descriptors is None:
-      telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS
-    metric_namespace = telemetry_util.MakeTfxNamespace(telemetry_descriptors +
-                                                       _IO_TELEMETRY_DESCRIPTOR)
-    namer = _GetMetricNamer(logical_format, physical_format)
-    self._num_rows = beam.metrics.Metrics.counter(
-        metric_namespace, namer("num_raw_records"))
-    self._byte_size_dist = beam.metrics.Metrics.distribution(
-        metric_namespace, namer("raw_record_byte_size"))
-
-  def process(self, raw_record: bytes) -> Iterable[bytes]:
-    self._num_rows.inc()
-    self._byte_size_dist.update(len(raw_record))
-    yield raw_record
-
-
-def _GetMetricNamer(
-    logical_format: Text, physical_format: Text) -> Callable[[Text], Text]:
-  """Returns a function to construct beam metric names."""
-  for f in (logical_format, physical_format):
-    assert "[" not in f, "Invalid logical / physical format string: %s" % f
-    assert "]" not in f, "Invalid logical / physical format string: %s" % f
-    assert "-" not in f, "Invalid logical / physical format string: %s" % f
-
-  def _Namer(base_name: Text) -> Text:
-    assert "-" not in base_name, "Invalid metric name: %s" % base_name
-    return "LogicalFormat[%s]-PhysicalFormat[%s]-%s" % (
-        logical_format, physical_format, base_name)
-
-  return _Namer
+    """A DoFn that profiles raw records and updates Beam counters.
+
+    The following metrics are maintained:
+
+    num_raw_records: Counter. Total number of rows.
+    raw_record_byte_size: Distribution. Byte size of the raw records.
+    """
+
+    def __init__(
+        self,
+        telemetry_descriptors: Optional[List[str]],
+        logical_format: str,
+        physical_format: str,
+    ):
+        if telemetry_descriptors is None:
+            telemetry_descriptors = _UNKNOWN_TELEMETRY_DESCRIPTORS
+        metric_namespace = telemetry_util.MakeTfxNamespace(
+            telemetry_descriptors + _IO_TELEMETRY_DESCRIPTOR
+        )
+        namer = _GetMetricNamer(logical_format, physical_format)
+        self._num_rows = beam.metrics.Metrics.counter(
+            metric_namespace, namer("num_raw_records")
+        )
+        self._byte_size_dist = beam.metrics.Metrics.distribution(
+            metric_namespace, namer("raw_record_byte_size")
+        )
+
+    def process(self, raw_record: bytes) -> Iterable[bytes]:
+        self._num_rows.inc()
+        self._byte_size_dist.update(len(raw_record))
+        yield raw_record
+
+
+def _GetMetricNamer(logical_format: str, physical_format: str) -> Callable[[str], str]:
+    """Returns a function to construct beam metric names."""
+    for f in (logical_format, physical_format):
+        assert "[" not in f, "Invalid logical / physical format string: %s" % f
+        assert "]" not in f, "Invalid logical / physical format string: %s" % f
+        assert "-" not in f, "Invalid logical / physical format string: %s" % f
+
+    def _Namer(base_name: str) -> str:
+        assert "-" not in base_name, "Invalid metric name: %s" % base_name
+        return "LogicalFormat[%s]-PhysicalFormat[%s]-%s" % (
+            logical_format,
+            physical_format,
+            base_name,
+        )
+
+    return _Namer
diff --git a/tfx_bsl/tfxio/telemetry_test.py b/tfx_bsl/tfxio/telemetry_test.py
index 5621310f..93ef0f61 100644
--- a/tfx_bsl/tfxio/telemetry_test.py
+++ b/tfx_bsl/tfxio/telemetry_test.py
@@ -17,15 +17,12 @@
 
 import apache_beam as beam
 import pyarrow as pa
-from tfx_bsl.tfxio import telemetry
-
-from absl.testing import absltest
-from absl.testing import parameterized
+from absl.testing import absltest, parameterized
 
+from tfx_bsl.tfxio import telemetry
 
 # Used for constructing expected values for a distribution metric.
-_Distribution = collections.namedtuple(
-    "_Distribution", ["min", "max", "count", "sum"])
+_Distribution = collections.namedtuple("_Distribution", ["min", "max", "count", "sum"])
 
 # Used for constructing expected values for a distribution metric. The tests
 # will not check the exact values in that distribution, instead, only check
@@ -38,37 +35,46 @@
 
 
 def _GetMetricName(name):
-  return "LogicalFormat[%s]-PhysicalFormat[%s]-%s" % (_LOGICAL_FORMAT,
-                                                      _PHYSICAL_FORMAT, name)
+    return "LogicalFormat[%s]-PhysicalFormat[%s]-%s" % (
+        _LOGICAL_FORMAT,
+        _PHYSICAL_FORMAT,
+        name,
+    )
+
 
 _PROFILE_RECORD_BATCHES_TEST_CASES = [
     dict(
         testcase_name="multi_column_mixed_type",
-        record_batch=pa.RecordBatch.from_arrays([
-            pa.array([["abc", "def"], None, ["g"], []]),
-            pa.array([[1], [2], [3], [4]]),
-            pa.array([[1.0], [], [], [0.5]]),
-            pa.array([None, None, None, None], type=pa.null()),
-        ], ["f1", "f2", "f3", "f4"]),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array([["abc", "def"], None, ["g"], []]),
+                pa.array([[1], [2], [3], [4]]),
+                pa.array([[1.0], [], [], [0.5]]),
+                pa.array([None, None, None, None], type=pa.null()),
+            ],
+            ["f1", "f2", "f3", "f4"],
+        ),
         expected_distributions={
-            _GetMetricName("num_rows"): _Distribution(
-                min=1, max=3, count=2, sum=4
-            ),
+            _GetMetricName("num_rows"): _Distribution(min=1, max=3, count=2, sum=4),
             # byte size of an arrow array may change over time. Do not test
             # for exact values.
             _GetMetricName("record_batch_byte_size"): _IGNORE_VALUES,
-            _GetMetricName("num_columns"):
-                _Distribution(min=3, max=4, count=4, sum=15),
-            _GetMetricName("num_feature_values"):
-                _Distribution(sum=13, count=16, min=0, max=2),
-            _GetMetricName("num_feature_values[INT]"):
-                _Distribution(sum=4, count=4, min=1, max=1),
-            _GetMetricName("num_feature_values[FLOAT]"):
-                _Distribution(sum=2, count=4, min=0, max=1),
-            _GetMetricName("num_feature_values[STRING]"):
-                _Distribution(sum=3, count=4, min=0, max=2),
-            _GetMetricName("num_feature_values[NULL]"):
-                _Distribution(sum=4, count=4, min=1, max=1),
+            _GetMetricName("num_columns"): _Distribution(min=3, max=4, count=4, sum=15),
+            _GetMetricName("num_feature_values"): _Distribution(
+                sum=13, count=16, min=0, max=2
+            ),
+            _GetMetricName("num_feature_values[INT]"): _Distribution(
+                sum=4, count=4, min=1, max=1
+            ),
+            _GetMetricName("num_feature_values[FLOAT]"): _Distribution(
+                sum=2, count=4, min=0, max=1
+            ),
+            _GetMetricName("num_feature_values[STRING]"): _Distribution(
+                sum=3, count=4, min=0, max=2
+            ),
+            _GetMetricName("num_feature_values[NULL]"): _Distribution(
+                sum=4, count=4, min=1, max=1
+            ),
         },
         expected_counters={
             _GetMetricName("num_cells[NULL]"): 4,
@@ -82,154 +88,175 @@ def _GetMetricName(name):
     dict(
         testcase_name="deeply_nested_list",
         record_batch=pa.RecordBatch.from_arrays(
-            [pa.array([[[[1, 2, 3], [4]], [[5]]], [[None, [1]]]])], ["f1"]),
+            [pa.array([[[[1, 2, 3], [4]], [[5]]], [[None, [1]]]])], ["f1"]
+        ),
         expected_distributions={
-            _GetMetricName("num_rows"): _Distribution(
-                min=1, max=1, count=2, sum=2
-            ),
+            _GetMetricName("num_rows"): _Distribution(min=1, max=1, count=2, sum=2),
             _GetMetricName("record_batch_byte_size"): _IGNORE_VALUES,
-            _GetMetricName("num_columns"):
-                _Distribution(min=1, max=1, count=2, sum=2),
+            _GetMetricName("num_columns"): _Distribution(min=1, max=1, count=2, sum=2),
             # First row: 5 values; second row: 1 value
-            _GetMetricName("num_feature_values"):
-                _Distribution(sum=6, count=2, min=1, max=5),
-            _GetMetricName("num_feature_values[INT]"):
-                _Distribution(sum=6, count=2, min=1, max=5),
+            _GetMetricName("num_feature_values"): _Distribution(
+                sum=6, count=2, min=1, max=5
+            ),
+            _GetMetricName("num_feature_values[INT]"): _Distribution(
+                sum=6, count=2, min=1, max=5
+            ),
         },
         expected_counters={
             _GetMetricName("num_cells[OTHER]"): 2,
-        }),
+        },
+    ),
     dict(
         testcase_name="struct",
-        record_batch=pa.RecordBatch.from_arrays([
-            pa.array([[
-                {
-                    "foo": ["a", "b"],
-                    "bar": None
-                },
-                {
-                    "foo": ["c"],
-                    "bar": [1, 2]
-                },
-            ], [{
-                "foo": None,
-                "bar": [3]
-            }]])
-        ], ["f1"]),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [
+                        [
+                            {"foo": ["a", "b"], "bar": None},
+                            {"foo": ["c"], "bar": [1, 2]},
+                        ],
+                        [{"foo": None, "bar": [3]}],
+                    ]
+                )
+            ],
+            ["f1"],
+        ),
         expected_distributions={
-            _GetMetricName("num_rows"): _Distribution(
-                min=1, max=1, count=2, sum=2
-            ),
+            _GetMetricName("num_rows"): _Distribution(min=1, max=1, count=2, sum=2),
             _GetMetricName("record_batch_byte_size"): _IGNORE_VALUES,
-            _GetMetricName("num_columns"):
-                _Distribution(min=1, max=1, count=2, sum=2),
+            _GetMetricName("num_columns"): _Distribution(min=1, max=1, count=2, sum=2),
             # min came from the second row, number of int values.
             # max came from the first row, number of string values.
-            _GetMetricName("num_feature_values"):
-                _Distribution(sum=6, count=4, min=0, max=3),
+            _GetMetricName("num_feature_values"): _Distribution(
+                sum=6, count=4, min=0, max=3
+            ),
             # First row: 2 int values; second row: 1 int value.
-            _GetMetricName("num_feature_values[INT]"):
-                _Distribution(sum=3, count=2, min=1, max=2),
+            _GetMetricName("num_feature_values[INT]"): _Distribution(
+                sum=3, count=2, min=1, max=2
+            ),
             # First row: 3 string values; second row: 0 string value.
-            _GetMetricName("num_feature_values[STRING]"):
-                _Distribution(sum=3, count=2, min=0, max=3),
+            _GetMetricName("num_feature_values[STRING]"): _Distribution(
+                sum=3, count=2, min=0, max=3
+            ),
         },
         expected_counters={
             _GetMetricName("num_cells[OTHER]"): 2,
-        }),
+        },
+    ),
 ]
 
 
 class TelemetryTest(parameterized.TestCase):
+    def _AssertDistributionEqual(self, beam_distribution_result, expected, msg=None):
+        if expected is _IGNORE_VALUES:
+            return
+        try:
+            self.assertEqual(beam_distribution_result.min, expected.min)
+            self.assertEqual(beam_distribution_result.max, expected.max)
+            self.assertEqual(beam_distribution_result.sum, expected.sum)
+            self.assertEqual(beam_distribution_result.count, expected.count)
+        except AssertionError:
+            raise AssertionError(
+                "{}Expected: {}; got: {}".format(
+                    (msg + ": ") if msg else "", expected, beam_distribution_result
+                )
+            )
+
+    @parameterized.named_parameters(*_PROFILE_RECORD_BATCHES_TEST_CASES)
+    def testProfileRecordBatches(
+        self,
+        record_batch,
+        expected_distributions,
+        expected_counters,
+        telemetry_descriptors=None,
+        expected_namespace="tfx.UNKNOWN_COMPONENT.io",
+    ):
+        p = beam.Pipeline()
+        _ = (
+            p
+            # Slice the input into two pieces to make sure profiling can handle
+            # sliced RecordBatches.
+            | "CreateTestData"
+            >> beam.Create([record_batch.slice(0, 1), record_batch.slice(1)])
+            | "Profile"
+            >> telemetry.ProfileRecordBatches(
+                telemetry_descriptors, _LOGICAL_FORMAT, _PHYSICAL_FORMAT, 1.0
+            )
+        )
+        runner = p.run()
+        runner.wait_until_finish()
+        all_metrics = runner.metrics()
+        maintained_metrics = all_metrics.query(
+            beam.metrics.metric.MetricsFilter().with_namespace(expected_namespace)
+        )
+
+        counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
+        self.assertLen(counters, len(expected_counters))
+        for counter in counters:
+            self.assertEqual(counter.result, expected_counters[counter.key.metric.name])
+
+        distributions = maintained_metrics[
+            beam.metrics.metric.MetricResults.DISTRIBUTIONS
+        ]
+        self.assertLen(distributions, len(expected_distributions))
+        for dist in distributions:
+            self.assertIn(dist.key.metric.name, expected_distributions)
+            self._AssertDistributionEqual(
+                dist.result,
+                expected_distributions[dist.key.metric.name],
+                dist.key.metric.name,
+            )
+
+    @parameterized.named_parameters(
+        dict(
+            testcase_name="no_descriptors",
+            telemetry_descriptors=None,
+            expected_namespace="tfx.UNKNOWN_COMPONENT.io",
+        ),
+        dict(
+            testcase_name="with_descriptors",
+            telemetry_descriptors=["test", "component"],
+            expected_namespace="tfx.test.component.io",
+        ),
+    )
+    def testProfileRawRecords(self, telemetry_descriptors, expected_namespace):
+        p = beam.Pipeline()
+        _ = (
+            p
+            | "CreateTestData" >> beam.Create([b"aaa", b"bbbb"])
+            | "Profile"
+            >> telemetry.ProfileRawRecords(
+                telemetry_descriptors, _LOGICAL_FORMAT, _PHYSICAL_FORMAT
+            )
+        )
+        runner = p.run()
+        runner.wait_until_finish()
+        all_metrics = runner.metrics()
+        maintained_metrics = all_metrics.query(
+            beam.metrics.metric.MetricsFilter().with_namespace(expected_namespace)
+        )
+        counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
+        self.assertLen(counters, 1)
+        num_records_counter = counters[0]
+        self.assertEqual(
+            _GetMetricName("num_raw_records"), num_records_counter.key.metric.name
+        )
+        self.assertEqual(2, num_records_counter.result)
 
-  def _AssertDistributionEqual(
-      self, beam_distribution_result, expected, msg=None):
-    if expected is _IGNORE_VALUES:
-      return
-    try:
-      self.assertEqual(beam_distribution_result.min, expected.min)
-      self.assertEqual(beam_distribution_result.max, expected.max)
-      self.assertEqual(beam_distribution_result.sum, expected.sum)
-      self.assertEqual(beam_distribution_result.count, expected.count)
-    except AssertionError:
-      raise AssertionError("{}Expected: {}; got: {}".format(
-          (msg + ": ") if msg else "", expected, beam_distribution_result))
-
-  @parameterized.named_parameters(*_PROFILE_RECORD_BATCHES_TEST_CASES)
-  def testProfileRecordBatches(self,
-                               record_batch,
-                               expected_distributions,
-                               expected_counters,
-                               telemetry_descriptors=None,
-                               expected_namespace="tfx.UNKNOWN_COMPONENT.io"):
-    p = beam.Pipeline()
-    _ = (
-        p
-        # Slice the input into two pieces to make sure profiling can handle
-        # sliced RecordBatches.
-        | "CreateTestData" >> beam.Create(
-            [record_batch.slice(0, 1),
-             record_batch.slice(1)])
-        | "Profile" >> telemetry.ProfileRecordBatches(
-            telemetry_descriptors, _LOGICAL_FORMAT, _PHYSICAL_FORMAT, 1.0))
-    runner = p.run()
-    runner.wait_until_finish()
-    all_metrics = runner.metrics()
-    maintained_metrics = all_metrics.query(
-        beam.metrics.metric.MetricsFilter().with_namespace(expected_namespace))
-
-    counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
-    self.assertLen(counters, len(expected_counters))
-    for counter in counters:
-      self.assertEqual(
-          counter.result, expected_counters[counter.key.metric.name])
-
-    distributions = maintained_metrics[
-        beam.metrics.metric.MetricResults.DISTRIBUTIONS]
-    self.assertLen(distributions, len(expected_distributions))
-    for dist in distributions:
-      self.assertIn(dist.key.metric.name, expected_distributions)
-      self._AssertDistributionEqual(
-          dist.result, expected_distributions[dist.key.metric.name],
-          dist.key.metric.name)
-
-  @parameterized.named_parameters(
-      dict(
-          testcase_name="no_descriptors",
-          telemetry_descriptors=None,
-          expected_namespace="tfx.UNKNOWN_COMPONENT.io"),
-      dict(
-          testcase_name="with_descriptors",
-          telemetry_descriptors=["test", "component"],
-          expected_namespace="tfx.test.component.io"))
-  def testProfileRawRecords(self, telemetry_descriptors, expected_namespace):
-    p = beam.Pipeline()
-    _ = (
-        p
-        | "CreateTestData" >> beam.Create([b"aaa", b"bbbb"])
-        | "Profile" >> telemetry.ProfileRawRecords(
-            telemetry_descriptors, _LOGICAL_FORMAT, _PHYSICAL_FORMAT))
-    runner = p.run()
-    runner.wait_until_finish()
-    all_metrics = runner.metrics()
-    maintained_metrics = all_metrics.query(
-        beam.metrics.metric.MetricsFilter().with_namespace(expected_namespace))
-    counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
-    self.assertLen(counters, 1)
-    num_records_counter = counters[0]
-    self.assertEqual(_GetMetricName("num_raw_records"),
-                     num_records_counter.key.metric.name)
-    self.assertEqual(2, num_records_counter.result)
-
-    distributions = maintained_metrics[
-        beam.metrics.metric.MetricResults.DISTRIBUTIONS]
-    self.assertLen(distributions, 1)
-    byte_size_distribution = distributions[0]
-    self.assertEqual(_GetMetricName("raw_record_byte_size"),
-                     byte_size_distribution.key.metric.name)
-    self._AssertDistributionEqual(byte_size_distribution.result,
-                                  _Distribution(min=3, max=4, count=2, sum=7))
+        distributions = maintained_metrics[
+            beam.metrics.metric.MetricResults.DISTRIBUTIONS
+        ]
+        self.assertLen(distributions, 1)
+        byte_size_distribution = distributions[0]
+        self.assertEqual(
+            _GetMetricName("raw_record_byte_size"),
+            byte_size_distribution.key.metric.name,
+        )
+        self._AssertDistributionEqual(
+            byte_size_distribution.result, _Distribution(min=3, max=4, count=2, sum=7)
+        )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/telemetry_test_util.py b/tfx_bsl/tfxio/telemetry_test_util.py
index dac483fb..67f31388 100644
--- a/tfx_bsl/tfxio/telemetry_test_util.py
+++ b/tfx_bsl/tfxio/telemetry_test_util.py
@@ -13,8 +13,8 @@
 # limitations under the License.
 """TFXIO telemetry test utilities."""
 
-from typing import List, Text
 import unittest
+from typing import List, Text
 
 import apache_beam as beam
 
@@ -22,20 +22,26 @@
 def ValidateMetrics(
     test: unittest.TestCase,
     pipeline_result: beam.runners.runner.PipelineResult,
-    telemetry_descriptors: List[Text],
-    logical_format: Text, physical_format: Text):
-  all_metrics = pipeline_result.metrics()
-  maintained_metrics = all_metrics.query(
-      beam.metrics.metric.MetricsFilter().with_namespace(
-          "tfx." + ".".join(telemetry_descriptors + ["io"])))
-  test.assertIsNot(maintained_metrics, None)
-  counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
-  test.assertTrue(counters)
-  distributions = maintained_metrics[
-      beam.metrics.metric.MetricResults.DISTRIBUTIONS]
-  test.assertTrue(distributions)
-  for m in counters + distributions:
-    test.assertTrue(
-        m.key.metric.name.startswith("LogicalFormat[%s]-PhysicalFormat[%s]-" %
-                                     (logical_format, physical_format)),
-        m.key.metric.name)
+    telemetry_descriptors: List[str],
+    logical_format: str,
+    physical_format: str,
+):
+    all_metrics = pipeline_result.metrics()
+    maintained_metrics = all_metrics.query(
+        beam.metrics.metric.MetricsFilter().with_namespace(
+            "tfx." + ".".join(telemetry_descriptors + ["io"])
+        )
+    )
+    test.assertIsNot(maintained_metrics, None)
+    counters = maintained_metrics[beam.metrics.metric.MetricResults.COUNTERS]
+    test.assertTrue(counters)
+    distributions = maintained_metrics[beam.metrics.metric.MetricResults.DISTRIBUTIONS]
+    test.assertTrue(distributions)
+    for m in counters + distributions:
+        test.assertTrue(
+            m.key.metric.name.startswith(
+                "LogicalFormat[%s]-PhysicalFormat[%s]-"
+                % (logical_format, physical_format)
+            ),
+            m.key.metric.name,
+        )
diff --git a/tfx_bsl/tfxio/tensor_adapter.py b/tfx_bsl/tfxio/tensor_adapter.py
index 034f9973..2e06a6c7 100644
--- a/tfx_bsl/tfxio/tensor_adapter.py
+++ b/tfx_bsl/tfxio/tensor_adapter.py
@@ -21,760 +21,844 @@
 import numpy as np
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.arrow import array_util
-from tfx_bsl.arrow import path
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
-TensorRepresentations = Dict[str, schema_pb2.TensorRepresentation]
-
+from tfx_bsl.arrow import array_util, path
 
-class TensorAdapterConfig(object):
-  """Config to a TensorAdapter.
-
-  Contains all the information needed to create a TensorAdapter.
-  """
-
-  def __init__(self,
-               arrow_schema: pa.Schema,
-               tensor_representations: TensorRepresentations,
-               original_type_specs: Optional[Dict[str, tf.TypeSpec]] = None):
-    self.arrow_schema = arrow_schema
-    self.tensor_representations = tensor_representations
-    self.original_type_specs = original_type_specs
-
-  # See b/167128119 for the reason behind custom pickle/unpickle
-  # implementations.
-  def __getstate__(self):
-    return (self.arrow_schema, {
-        k: v.SerializeToString()
-        for k, v in self.tensor_representations.items()
-    }, self.original_type_specs)
-
-  def __setstate__(self, t):
-    tensor_representations = {}
-    for k, v in t[1].items():
-      r = schema_pb2.TensorRepresentation()
-      r.ParseFromString(v)
-      tensor_representations[k] = r
-    self.__init__(t[0], tensor_representations, t[2])
-
-
-class TensorAdapter(object):
-  """A TensorAdapter converts a RecordBatch to a collection of TF Tensors.
-
-  The conversion is determined by both the Arrow schema and the
-  TensorRepresentations, which must be provided at the initialization time.
-  Each TensorRepresentation contains the information needed to translates one
-  or more columns in a RecordBatch of the given Arrow schema into a TF Tensor
-  or CompositeTensor. They are contained in a Dict whose keys are
-  the names of the tensors, which will be the keys of the Dict produced by
-  ToBatchTensors().
-
-  TypeSpecs() returns static TypeSpecs of those tensors by their names, i.e.
-  if they have a shape, then the size of the first (batch) dimension is always
-  unknown (None) because it depends on the size of the RecordBatch passed to
-  ToBatchTensors().
-
-  It is guaranteed that for any tensor_name in the given TensorRepresentations
-  self.TypeSpecs()[tensor_name].is_compatible_with(
-      self.ToBatchedTensors(...)[tensor_name])
-
-  Sliced RecordBatches and LargeListArray columns having null elements backed by
-  non-empty sub-lists are not supported and will yield undefined behaviour.
-  """
-
-  __slots__ = [
-      "_arrow_schema", "_type_handlers", "_type_specs", "_original_type_specs"
-  ]
-
-  def __init__(self, config: TensorAdapterConfig):
-
-    self._arrow_schema = config.arrow_schema
-    self._type_handlers = _BuildTypeHandlers(config.tensor_representations,
-                                             config.arrow_schema)
-    self._type_specs = {
-        tensor_name: handler.type_spec
-        for tensor_name, handler in self._type_handlers
-    }
-
-    self._original_type_specs = (
-        self._type_specs
-        if config.original_type_specs is None else config.original_type_specs)
-
-    for tensor_name, type_spec in self._type_specs.items():
-      original_type_spec = self._original_type_specs.get(tensor_name, None)
-      if original_type_spec is None or original_type_spec != type_spec:
-        raise ValueError(
-            "original_type_specs must be a superset of type_specs derived from "
-            "TensorRepresentations. But for tensor {}, got {} vs {}".format(
-                tensor_name, original_type_spec, type_spec))
-
-  def OriginalTypeSpecs(self) -> Dict[str, tf.TypeSpec]:
-    """Returns the origin's type specs.
-
-    A TFXIO 'Y' may be a result of projection of another TFXIO 'X', in which
-    case then 'X' is the origin of 'Y'. And this method returns what
-    X.TensorAdapter().TypeSpecs() would return.
-
-    May equal to `self.TypeSpecs()`.
-
-    Returns: a mapping from tensor names to `tf.TypeSpec`s.
-    """
-    return self._original_type_specs
+TensorRepresentations = Dict[str, schema_pb2.TensorRepresentation]
 
-  def TypeSpecs(self) -> Dict[str, tf.TypeSpec]:
-    """Returns the TypeSpec for each tensor."""
-    return self._type_specs
 
-  def ToBatchTensors(
-      self,
-      record_batch: pa.RecordBatch,
-      produce_eager_tensors: Optional[bool] = None) -> Dict[str, Any]:
-    """Returns a batch of tensors translated from `record_batch`.
+class TensorAdapterConfig:
+    """Config to a TensorAdapter.
 
-    Args:
-      record_batch: input RecordBatch.
-      produce_eager_tensors: controls whether the ToBatchTensors() produces
-        eager tensors or ndarrays (or Tensor value objects). If None, determine
-        that from whether TF Eager mode is enabled.
+    Contains all the information needed to create a TensorAdapter.
+    """
 
-    Raises:
-      RuntimeError: when Eager Tensors are requested but TF is not executing
-        eagerly.
-      ValueError: when Any handler failed to produce a Tensor.
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representations: TensorRepresentations,
+        original_type_specs: Optional[Dict[str, tf.TypeSpec]] = None,
+    ):
+        self.arrow_schema = arrow_schema
+        self.tensor_representations = tensor_representations
+        self.original_type_specs = original_type_specs
+
+    # See b/167128119 for the reason behind custom pickle/unpickle
+    # implementations.
+    def __getstate__(self):
+        return (
+            self.arrow_schema,
+            {k: v.SerializeToString() for k, v in self.tensor_representations.items()},
+            self.original_type_specs,
+        )
+
+    def __setstate__(self, t):
+        tensor_representations = {}
+        for k, v in t[1].items():
+            r = schema_pb2.TensorRepresentation()
+            r.ParseFromString(v)
+            tensor_representations[k] = r
+        self.__init__(t[0], tensor_representations, t[2])
+
+
+class TensorAdapter:
+    """A TensorAdapter converts a RecordBatch to a collection of TF Tensors.
+
+    The conversion is determined by both the Arrow schema and the
+    TensorRepresentations, which must be provided at the initialization time.
+    Each TensorRepresentation contains the information needed to translates one
+    or more columns in a RecordBatch of the given Arrow schema into a TF Tensor
+    or CompositeTensor. They are contained in a Dict whose keys are
+    the names of the tensors, which will be the keys of the Dict produced by
+    ToBatchTensors().
+
+    TypeSpecs() returns static TypeSpecs of those tensors by their names, i.e.
+    if they have a shape, then the size of the first (batch) dimension is always
+    unknown (None) because it depends on the size of the RecordBatch passed to
+    ToBatchTensors().
+
+    It is guaranteed that for any tensor_name in the given TensorRepresentations
+    self.TypeSpecs()[tensor_name].is_compatible_with(
+        self.ToBatchedTensors(...)[tensor_name])
+
+    Sliced RecordBatches and LargeListArray columns having null elements backed by
+    non-empty sub-lists are not supported and will yield undefined behaviour.
     """
 
-    tf_executing_eagerly = tf.executing_eagerly()
-    if produce_eager_tensors and not tf_executing_eagerly:
-      raise RuntimeError(
-          "Eager Tensors were requested but eager mode was not enabled.")
-    if produce_eager_tensors is None:
-      produce_eager_tensors = tf_executing_eagerly
-
-    if not record_batch.schema.equals(self._arrow_schema):
-      raise ValueError("Expected same schema.")
-    result = {}
-    for tensor_name, handler in self._type_handlers:
-      try:
-        result[tensor_name] = handler.GetTensor(record_batch,
-                                                produce_eager_tensors)
-      except Exception as e:
-        raise ValueError(
-            "Error raised when handling tensor '{}'".format(tensor_name)) from e
+    __slots__ = [
+        "_arrow_schema",
+        "_type_handlers",
+        "_type_specs",
+        "_original_type_specs",
+    ]
 
-    return result
+    def __init__(self, config: TensorAdapterConfig):
+        self._arrow_schema = config.arrow_schema
+        self._type_handlers = _BuildTypeHandlers(
+            config.tensor_representations, config.arrow_schema
+        )
+        self._type_specs = {
+            tensor_name: handler.type_spec
+            for tensor_name, handler in self._type_handlers
+        }
+
+        self._original_type_specs = (
+            self._type_specs
+            if config.original_type_specs is None
+            else config.original_type_specs
+        )
+
+        for tensor_name, type_spec in self._type_specs.items():
+            original_type_spec = self._original_type_specs.get(tensor_name, None)
+            if original_type_spec is None or original_type_spec != type_spec:
+                raise ValueError(
+                    "original_type_specs must be a superset of type_specs derived from "
+                    f"TensorRepresentations. But for tensor {tensor_name}, got {original_type_spec} vs {type_spec}"
+                )
+
+    def OriginalTypeSpecs(self) -> Dict[str, tf.TypeSpec]:
+        """Returns the origin's type specs.
+
+        A TFXIO 'Y' may be a result of projection of another TFXIO 'X', in which
+        case then 'X' is the origin of 'Y'. And this method returns what
+        X.TensorAdapter().TypeSpecs() would return.
+
+        May equal to `self.TypeSpecs()`.
+
+        Returns: a mapping from tensor names to `tf.TypeSpec`s.
+        """
+        return self._original_type_specs
+
+    def TypeSpecs(self) -> Dict[str, tf.TypeSpec]:
+        """Returns the TypeSpec for each tensor."""
+        return self._type_specs
+
+    def ToBatchTensors(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: Optional[bool] = None
+    ) -> Dict[str, Any]:
+        """Returns a batch of tensors translated from `record_batch`.
+
+        Args:
+        ----
+          record_batch: input RecordBatch.
+          produce_eager_tensors: controls whether the ToBatchTensors() produces
+            eager tensors or ndarrays (or Tensor value objects). If None, determine
+            that from whether TF Eager mode is enabled.
+
+        Raises:
+        ------
+          RuntimeError: when Eager Tensors are requested but TF is not executing
+            eagerly.
+          ValueError: when Any handler failed to produce a Tensor.
+        """
+        tf_executing_eagerly = tf.executing_eagerly()
+        if produce_eager_tensors and not tf_executing_eagerly:
+            raise RuntimeError(
+                "Eager Tensors were requested but eager mode was not enabled."
+            )
+        if produce_eager_tensors is None:
+            produce_eager_tensors = tf_executing_eagerly
+
+        if not record_batch.schema.equals(self._arrow_schema):
+            raise ValueError("Expected same schema.")
+        result = {}
+        for tensor_name, handler in self._type_handlers:
+            try:
+                result[tensor_name] = handler.GetTensor(
+                    record_batch, produce_eager_tensors
+                )
+            except Exception as e:
+                raise ValueError(
+                    f"Error raised when handling tensor '{tensor_name}'"
+                ) from e
+
+        return result
 
 
 class _TypeHandler(abc.ABC):
-  """Base class of all type handlers.
-
-  A TypeHandler converts one or more columns in a RecordBatch to a TF Tensor
-  or CompositeTensor according to a TensorRepresentation.
-
-  All TypeHandlers are registered by TensorRepresentation types in
-  _TYPE_HANDLER_MAP.
-  """
-
-  __slots__ = []
-
-  @abc.abstractmethod
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    """Initializer.
-
-    It can be assumed that CanHandle(arrow_schema, tensor_representation) would
-    return true.
-
-    Args:
-      arrow_schema: the Arrow Schema that all the RecordBatches that
-        self.GetTensor() will take conform to.
-      tensor_representation: the TensorRepresentation that determines the
-        conversion.
-    """
+    """Base class of all type handlers.
 
-  @property
-  def type_spec(self) -> tf.TypeSpec:
-    """Returns the TypeSpec of the converted Tensor or CompositeTensor."""
-    raise NotImplementedError
+    A TypeHandler converts one or more columns in a RecordBatch to a TF Tensor
+    or CompositeTensor according to a TensorRepresentation.
 
-  @abc.abstractmethod
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Any:
-    """Converts the RecordBatch to Tensor or CompositeTensor.
-
-    The result must be of the same (not only compatible) TypeSpec as
-    self.type_spec.
-
-    Args:
-      record_batch: a RecordBatch that is of the same Schema as what was passed
-        at initialization time.
-      produce_eager_tensors: if True, returns Eager Tensors, otherwise returns
-        ndarrays or Tensor value objects.
-
-    Returns:
-      A Tensor or a CompositeTensor. Note that their types may vary depending
-      on whether the TF eager mode is on.
+    All TypeHandlers are registered by TensorRepresentation types in
+    _TYPE_HANDLER_MAP.
     """
 
-  @staticmethod
-  @abc.abstractmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    """Returns true if an instance of the handler can handle the combination."""  # pytype: disable=bad-return-type
+    __slots__ = []
+
+    @abc.abstractmethod
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        """Initializer.
+
+        It can be assumed that CanHandle(arrow_schema, tensor_representation) would
+        return true.
+
+        Args:
+        ----
+          arrow_schema: the Arrow Schema that all the RecordBatches that
+            self.GetTensor() will take conform to.
+          tensor_representation: the TensorRepresentation that determines the
+            conversion.
+        """
+
+    @property
+    def type_spec(self) -> tf.TypeSpec:
+        """Returns the TypeSpec of the converted Tensor or CompositeTensor."""
+        raise NotImplementedError
+
+    @abc.abstractmethod
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Any:
+        """Converts the RecordBatch to Tensor or CompositeTensor.
+
+        The result must be of the same (not only compatible) TypeSpec as
+        self.type_spec.
+
+        Args:
+        ----
+          record_batch: a RecordBatch that is of the same Schema as what was passed
+            at initialization time.
+          produce_eager_tensors: if True, returns Eager Tensors, otherwise returns
+            ndarrays or Tensor value objects.
+
+        Returns:
+        -------
+          A Tensor or a CompositeTensor. Note that their types may vary depending
+          on whether the TF eager mode is on.
+        """
+
+    @staticmethod
+    @abc.abstractmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        """Returns true if an instance of the handler can handle the combination."""  # pytype: disable=bad-return-type
 
 
 class _BaseDenseTensorHandler(_TypeHandler):
-  """Base class of DenseTensorHandlers."""
-
-  __slots__ = [
-      "_column_index", "_dtype", "_shape", "_unbatched_flat_len",
-      "_convert_to_binary_fn"
-  ]
-
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    super().__init__(arrow_schema, tensor_representation)
-    dense_rep = tensor_representation.dense_tensor
-    column_name = dense_rep.column_name
-    self._column_index = arrow_schema.get_field_index(column_name)
-    _, value_type = _GetNestDepthAndValueType(arrow_schema,
-                                              path.ColumnPath(column_name))
-    self._dtype = _ArrowTypeToTfDtype(value_type)
-    self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
-    unbatched_shape = [
-        d.size for d in tensor_representation.dense_tensor.shape.dim
+    """Base class of DenseTensorHandlers."""
+
+    __slots__ = [
+        "_column_index",
+        "_dtype",
+        "_shape",
+        "_unbatched_flat_len",
+        "_convert_to_binary_fn",
     ]
-    self._shape = [None] + unbatched_shape
-    self._unbatched_flat_len = int(np.prod(unbatched_shape, initial=1))
-
-  @property
-  def type_spec(self) -> tf.TypeSpec:
-    # TF's type stub is not correct about TypeSpec and its sub-classes.
-    return typing.cast(tf.TypeSpec, tf.TensorSpec(self._shape, self._dtype))
-
-  def _ListArrayToTensor(
-      self, list_array: pa.Array,
-      produce_eager_tensors: bool) -> Union[np.ndarray, tf.Tensor]:
-    """Converts a ListArray to a dense tensor."""
-    values = list_array.flatten()
-    batch_size = len(list_array)
-    expected_num_elements = batch_size * self._unbatched_flat_len
-    if len(values) != expected_num_elements:
-      raise ValueError(
-          "Unable to convert a {} to a tensor of type spec {}: size mismatch. "
-          "Expected {} elements but got {}. "
-          "If your data type is tf.Example, make sure that the feature "
-          "is always present, and have the same length in all the examples. "
-          "TFX users should make sure there is no data anomaly for the feature."
-          .format(
-              type(list_array), self.type_spec, expected_num_elements,
-              len(values)))
-    actual_shape = list(self._shape)
-    actual_shape[0] = batch_size
-    if self._convert_to_binary_fn is not None:
-      values = self._convert_to_binary_fn(values)
-    values_np = np.asarray(values).reshape(actual_shape)
-    if produce_eager_tensors:
-      return tf.convert_to_tensor(values_np)
-
-    return values_np
-
-  @staticmethod
-  def BaseCanHandle(
-      arrow_schema: pa.Schema,
-      tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    depth, value_type = _GetNestDepthAndValueType(
-        arrow_schema,
-        path.ColumnPath(tensor_representation.dense_tensor.column_name))
-    # Can only handle 1-nested lists.
-    return depth == 1 and _IsSupportedArrowValueType(value_type)
+
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        super().__init__(arrow_schema, tensor_representation)
+        dense_rep = tensor_representation.dense_tensor
+        column_name = dense_rep.column_name
+        self._column_index = arrow_schema.get_field_index(column_name)
+        _, value_type = _GetNestDepthAndValueType(
+            arrow_schema, path.ColumnPath(column_name)
+        )
+        self._dtype = _ArrowTypeToTfDtype(value_type)
+        self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
+        unbatched_shape = [d.size for d in tensor_representation.dense_tensor.shape.dim]
+        self._shape = [None] + unbatched_shape
+        self._unbatched_flat_len = int(np.prod(unbatched_shape, initial=1))
+
+    @property
+    def type_spec(self) -> tf.TypeSpec:
+        # TF's type stub is not correct about TypeSpec and its sub-classes.
+        return typing.cast(tf.TypeSpec, tf.TensorSpec(self._shape, self._dtype))
+
+    def _ListArrayToTensor(
+        self, list_array: pa.Array, produce_eager_tensors: bool
+    ) -> Union[np.ndarray, tf.Tensor]:
+        """Converts a ListArray to a dense tensor."""
+        values = list_array.flatten()
+        batch_size = len(list_array)
+        expected_num_elements = batch_size * self._unbatched_flat_len
+        if len(values) != expected_num_elements:
+            raise ValueError(
+                f"Unable to convert a {type(list_array)} to a tensor of type spec {self.type_spec}: size mismatch. "
+                f"Expected {expected_num_elements} elements but got {len(values)}. "
+                "If your data type is tf.Example, make sure that the feature "
+                "is always present, and have the same length in all the examples. "
+                "TFX users should make sure there is no data anomaly for the feature."
+            )
+        actual_shape = list(self._shape)
+        actual_shape[0] = batch_size
+        if self._convert_to_binary_fn is not None:
+            values = self._convert_to_binary_fn(values)
+        values_np = np.asarray(values).reshape(actual_shape)
+        if produce_eager_tensors:
+            return tf.convert_to_tensor(values_np)
+
+        return values_np
+
+    @staticmethod
+    def BaseCanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        depth, value_type = _GetNestDepthAndValueType(
+            arrow_schema,
+            path.ColumnPath(tensor_representation.dense_tensor.column_name),
+        )
+        # Can only handle 1-nested lists.
+        return depth == 1 and _IsSupportedArrowValueType(value_type)
 
 
 class _DenseTensorHandler(_BaseDenseTensorHandler):
-  """Handles conversion to dense."""
+    """Handles conversion to dense."""
 
-  __slots__ = []
+    __slots__ = []
 
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Union[np.ndarray, tf.Tensor]:
-    column = record_batch.column(self._column_index)
-    return self._ListArrayToTensor(column, produce_eager_tensors)
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Union[np.ndarray, tf.Tensor]:
+        column = record_batch.column(self._column_index)
+        return self._ListArrayToTensor(column, produce_eager_tensors)
 
-  @staticmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    return (_BaseDenseTensorHandler.BaseCanHandle(arrow_schema,
-                                                  tensor_representation) and
-            not tensor_representation.dense_tensor.HasField("default_value"))
+    @staticmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        return _BaseDenseTensorHandler.BaseCanHandle(
+            arrow_schema, tensor_representation
+        ) and not tensor_representation.dense_tensor.HasField("default_value")
 
 
 class _DefaultFillingDenseTensorHandler(_BaseDenseTensorHandler):
-  """Handles conversion to dense with default filling."""
-
-  __slots__ = ["_default_fill"]
-
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    super().__init__(arrow_schema, tensor_representation)
-    _, value_type = _GetNestDepthAndValueType(
-        arrow_schema,
-        path.ColumnPath(tensor_representation.dense_tensor.column_name))
-    self._default_fill = _GetDefaultFill(
-        self._shape[1:], value_type,
-        tensor_representation.dense_tensor.default_value)
-
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Union[np.ndarray, tf.Tensor]:
-    column = record_batch.column(self._column_index)
-    column = array_util.FillNullLists(column, self._default_fill)
-    return self._ListArrayToTensor(column, produce_eager_tensors)
-
-  @staticmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    return (_BaseDenseTensorHandler.BaseCanHandle(arrow_schema,
-                                                  tensor_representation) and
-            tensor_representation.dense_tensor.HasField("default_value"))
+    """Handles conversion to dense with default filling."""
+
+    __slots__ = ["_default_fill"]
+
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        super().__init__(arrow_schema, tensor_representation)
+        _, value_type = _GetNestDepthAndValueType(
+            arrow_schema,
+            path.ColumnPath(tensor_representation.dense_tensor.column_name),
+        )
+        self._default_fill = _GetDefaultFill(
+            self._shape[1:],
+            value_type,
+            tensor_representation.dense_tensor.default_value,
+        )
+
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Union[np.ndarray, tf.Tensor]:
+        column = record_batch.column(self._column_index)
+        column = array_util.FillNullLists(column, self._default_fill)
+        return self._ListArrayToTensor(column, produce_eager_tensors)
+
+    @staticmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        return _BaseDenseTensorHandler.BaseCanHandle(
+            arrow_schema, tensor_representation
+        ) and tensor_representation.dense_tensor.HasField("default_value")
 
 
 class _VarLenSparseTensorHandler(_TypeHandler):
-  """Handles conversion to varlen sparse."""
-
-  __slots__ = ["_column_index", "_dtype", "_convert_to_binary_fn"]
-
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    super().__init__(arrow_schema, tensor_representation)
-    column_name = tensor_representation.varlen_sparse_tensor.column_name
-    self._column_index = arrow_schema.get_field_index(column_name)
-    _, value_type = _GetNestDepthAndValueType(arrow_schema,
-                                              path.ColumnPath(column_name))
-    self._dtype = _ArrowTypeToTfDtype(value_type)
-    self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
-
-  @property
-  def type_spec(self) -> tf.TypeSpec:
-    return typing.cast(
-        tf.TypeSpec,
-        tf.SparseTensorSpec(tf.TensorShape([None, None]), self._dtype))
-
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Any:
-    array = record_batch.column(self._column_index)
-    coo_array, dense_shape_array = array_util.CooFromListArray(array)
-    dense_shape_np = dense_shape_array.to_numpy()
-    values_array = array.flatten()
-    if self._convert_to_binary_fn is not None:
-      values_array = self._convert_to_binary_fn(values_array)
-    values_np = np.asarray(values_array)
-    coo_np = coo_array.to_numpy().reshape(values_np.size, 2)
-
-    if produce_eager_tensors:
-      return tf.sparse.SparseTensor(
-          indices=tf.convert_to_tensor(coo_np),
-          dense_shape=tf.convert_to_tensor(dense_shape_np),
-          values=tf.convert_to_tensor(values_np))
-    return tf.compat.v1.SparseTensorValue(
-        indices=coo_np, dense_shape=dense_shape_np, values=values_np)
-
-  @staticmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    depth, value_type = _GetNestDepthAndValueType(
-        arrow_schema,
-        path.ColumnPath(
-            [tensor_representation.varlen_sparse_tensor.column_name]))
-    # Currently can only handle 1-nested lists, but can easily support
-    # arbitrarily nested ListArrays.
-    return depth == 1 and _IsSupportedArrowValueType(value_type)
+    """Handles conversion to varlen sparse."""
+
+    __slots__ = ["_column_index", "_dtype", "_convert_to_binary_fn"]
+
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        super().__init__(arrow_schema, tensor_representation)
+        column_name = tensor_representation.varlen_sparse_tensor.column_name
+        self._column_index = arrow_schema.get_field_index(column_name)
+        _, value_type = _GetNestDepthAndValueType(
+            arrow_schema, path.ColumnPath(column_name)
+        )
+        self._dtype = _ArrowTypeToTfDtype(value_type)
+        self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
+
+    @property
+    def type_spec(self) -> tf.TypeSpec:
+        return typing.cast(
+            tf.TypeSpec, tf.SparseTensorSpec(tf.TensorShape([None, None]), self._dtype)
+        )
+
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Any:
+        array = record_batch.column(self._column_index)
+        coo_array, dense_shape_array = array_util.CooFromListArray(array)
+        dense_shape_np = dense_shape_array.to_numpy()
+        values_array = array.flatten()
+        if self._convert_to_binary_fn is not None:
+            values_array = self._convert_to_binary_fn(values_array)
+        values_np = np.asarray(values_array)
+        coo_np = coo_array.to_numpy().reshape(values_np.size, 2)
+
+        if produce_eager_tensors:
+            return tf.sparse.SparseTensor(
+                indices=tf.convert_to_tensor(coo_np),
+                dense_shape=tf.convert_to_tensor(dense_shape_np),
+                values=tf.convert_to_tensor(values_np),
+            )
+        return tf.compat.v1.SparseTensorValue(
+            indices=coo_np, dense_shape=dense_shape_np, values=values_np
+        )
+
+    @staticmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        depth, value_type = _GetNestDepthAndValueType(
+            arrow_schema,
+            path.ColumnPath([tensor_representation.varlen_sparse_tensor.column_name]),
+        )
+        # Currently can only handle 1-nested lists, but can easily support
+        # arbitrarily nested ListArrays.
+        return depth == 1 and _IsSupportedArrowValueType(value_type)
 
 
 class _SparseTensorHandler(_TypeHandler):
-  """Handles conversion to SparseTensors."""
-
-  __slots__ = [
-      "_index_column_indices", "_value_column_index", "_shape", "_dtype",
-      "_coo_size", "_convert_to_binary_fn"
-  ]
-
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    super().__init__(arrow_schema, tensor_representation)
-    sparse_representation = tensor_representation.sparse_tensor
-    self._index_column_indices = tuple(
-        arrow_schema.get_field_index(c)
-        for c in sparse_representation.index_column_names)
-    self._value_column_index = arrow_schema.get_field_index(
-        sparse_representation.value_column_name)
-    self._shape = [dim.size for dim in sparse_representation.dense_shape.dim]
-    _, value_type = _GetNestDepthAndValueType(
-        arrow_schema, path.ColumnPath(sparse_representation.value_column_name))
-    self._dtype = _ArrowTypeToTfDtype(value_type)
-    self._coo_size = len(self._shape) + 1
-    self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
-
-  @property
-  def type_spec(self) -> tf.TypeSpec:
-    batched_shape = [None] + [dim if dim != -1 else None for dim in self._shape]
-    return typing.cast(
-        tf.TypeSpec,
-        tf.SparseTensorSpec(tf.TensorShape(batched_shape), self._dtype))
-
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Any:
-    values_array = record_batch.column(self._value_column_index)
-    values_parent_indices = array_util.GetFlattenedArrayParentIndices(
-        values_array)
-    indices_arrays = [np.asarray(values_parent_indices)]
-    for index_column_index in self._index_column_indices:
-      indices_arrays.append(
-          np.asarray(record_batch.column(index_column_index).flatten()))
-    flat_values_array = values_array.flatten()
-    if self._convert_to_binary_fn is not None:
-      flat_values_array = self._convert_to_binary_fn(flat_values_array)
-    values_np = np.asarray(flat_values_array)
-    coo_np = np.empty(shape=(len(values_np), self._coo_size), dtype=np.int64)
-    try:
-      np.stack(indices_arrays, axis=1, out=coo_np)
-    except ValueError as e:
-      raise ValueError("Error constructing the COO for SparseTensor. "
-                       "number of values: {}; "
-                       "size of each index array: {}".format(
-                           len(values_np),
-                           [len(i) for i in indices_arrays])) from e
-
-    dense_shape = [len(record_batch)] + self._shape
-
-    if produce_eager_tensors:
-      return tf.sparse.SparseTensor(
-          indices=tf.convert_to_tensor(coo_np),
-          dense_shape=tf.convert_to_tensor(dense_shape, dtype=tf.int64),
-          values=tf.convert_to_tensor(values_np))
-    return tf.compat.v1.SparseTensorValue(
-        indices=coo_np, dense_shape=dense_shape, values=values_np)
-
-  @staticmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    """Returns whether `tensor_representation` can be handled."""
-    sparse_representation = tensor_representation.sparse_tensor
-    if (len(sparse_representation.dense_shape.dim) != len(
-        sparse_representation.index_column_names)):
-      return False
-
-    # All the index columns must be of integral types.
-    for index_column in sparse_representation.index_column_names:
-      depth, value_type = _GetNestDepthAndValueType(
-          arrow_schema, path.ColumnPath(index_column))
-      if depth != 1 or not pa.types.is_integer(value_type):
-        return False
-
-    depth, value_type = _GetNestDepthAndValueType(
-        arrow_schema, path.ColumnPath(sparse_representation.value_column_name))
-    return depth == 1 and _IsSupportedArrowValueType(value_type)
+    """Handles conversion to SparseTensors."""
+
+    __slots__ = [
+        "_index_column_indices",
+        "_value_column_index",
+        "_shape",
+        "_dtype",
+        "_coo_size",
+        "_convert_to_binary_fn",
+    ]
 
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        super().__init__(arrow_schema, tensor_representation)
+        sparse_representation = tensor_representation.sparse_tensor
+        self._index_column_indices = tuple(
+            arrow_schema.get_field_index(c)
+            for c in sparse_representation.index_column_names
+        )
+        self._value_column_index = arrow_schema.get_field_index(
+            sparse_representation.value_column_name
+        )
+        self._shape = [dim.size for dim in sparse_representation.dense_shape.dim]
+        _, value_type = _GetNestDepthAndValueType(
+            arrow_schema, path.ColumnPath(sparse_representation.value_column_name)
+        )
+        self._dtype = _ArrowTypeToTfDtype(value_type)
+        self._coo_size = len(self._shape) + 1
+        self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
+
+    @property
+    def type_spec(self) -> tf.TypeSpec:
+        batched_shape = [None] + [dim if dim != -1 else None for dim in self._shape]
+        return typing.cast(
+            tf.TypeSpec, tf.SparseTensorSpec(tf.TensorShape(batched_shape), self._dtype)
+        )
+
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Any:
+        values_array = record_batch.column(self._value_column_index)
+        values_parent_indices = array_util.GetFlattenedArrayParentIndices(values_array)
+        indices_arrays = [np.asarray(values_parent_indices)]
+        for index_column_index in self._index_column_indices:
+            indices_arrays.append(
+                np.asarray(record_batch.column(index_column_index).flatten())
+            )
+        flat_values_array = values_array.flatten()
+        if self._convert_to_binary_fn is not None:
+            flat_values_array = self._convert_to_binary_fn(flat_values_array)
+        values_np = np.asarray(flat_values_array)
+        coo_np = np.empty(shape=(len(values_np), self._coo_size), dtype=np.int64)
+        try:
+            np.stack(indices_arrays, axis=1, out=coo_np)
+        except ValueError as e:
+            raise ValueError(
+                "Error constructing the COO for SparseTensor. "
+                f"number of values: {len(values_np)}; "
+                f"size of each index array: {[len(i) for i in indices_arrays]}"
+            ) from e
+
+        dense_shape = [len(record_batch)] + self._shape
+
+        if produce_eager_tensors:
+            return tf.sparse.SparseTensor(
+                indices=tf.convert_to_tensor(coo_np),
+                dense_shape=tf.convert_to_tensor(dense_shape, dtype=tf.int64),
+                values=tf.convert_to_tensor(values_np),
+            )
+        return tf.compat.v1.SparseTensorValue(
+            indices=coo_np, dense_shape=dense_shape, values=values_np
+        )
+
+    @staticmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        """Returns whether `tensor_representation` can be handled."""
+        sparse_representation = tensor_representation.sparse_tensor
+        if len(sparse_representation.dense_shape.dim) != len(
+            sparse_representation.index_column_names
+        ):
+            return False
 
-class _RaggedTensorHandler(_TypeHandler):
-  """Handles conversion to RaggedTensors."""
-
-  __slots__ = [
-      "_column_index",
-      "_value_path",
-      "_dtype",
-      "_row_partition_dtype",
-      "_convert_to_binary_fn",
-      "_inner_fixed_shape",
-      "_values_fixed_shape",
-      "_inferred_dimensions_elements",
-      "_outer_ragged_rank",
-      "_ragged_partitions",
-      "_fixed_dimension_partitions",
-  ]
-
-  def __init__(self, arrow_schema: pa.Schema,
-               tensor_representation: schema_pb2.TensorRepresentation):
-    super().__init__(arrow_schema, tensor_representation)
-    ragged_representation = tensor_representation.ragged_tensor
-
-    self._value_path = path.ColumnPath.from_proto(
-        ragged_representation.feature_path)
-    self._column_index = arrow_schema.get_field_index(
-        ragged_representation.feature_path.step[0])
-    self._outer_ragged_rank, value_type = _GetNestDepthAndValueType(
-        arrow_schema, self._value_path)
-
-    # Split partitions to the ones defining Ragged dimensions and the ones
-    # defining the outer dimensions shape (through uniform row length
-    # partitions).
-    fixed_dimension = True
-    ragged_partitions = []
-    fixed_dimension_partitions = []
-    # Reverse through the partitions (from outer partition to inner), in order
-    # to extract the inner fixed shape of the resulting RaggedTensor.
-    for partition in reversed(ragged_representation.partition):
-      if partition.HasField("uniform_row_length") and fixed_dimension:
-        fixed_dimension_partitions.append(partition)
-      else:
-        fixed_dimension = False
-        ragged_partitions.append(partition)
-    self._ragged_partitions = ragged_partitions[::-1]
-    self._fixed_dimension_partitions = fixed_dimension_partitions[::-1]
-
-    inner_fixed_shape = []
-    inferred_dimensions_elements = 1
-    for partition in self._fixed_dimension_partitions:
-      inner_fixed_shape.append(partition.uniform_row_length)
-      inferred_dimensions_elements *= partition.uniform_row_length
-    self._inner_fixed_shape = inner_fixed_shape
-    self._values_fixed_shape = [-1] + inner_fixed_shape
-    self._inferred_dimensions_elements = inferred_dimensions_elements
-
-    self._dtype = _ArrowTypeToTfDtype(value_type)
-    self._row_partition_dtype = ragged_representation.row_partition_dtype
-    self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
-
-  @property
-  def type_spec(self) -> tf.TypeSpec:
-    row_splits_dtype = tf.int64
-    if (self._row_partition_dtype ==
-        schema_pb2.TensorRepresentation.RowPartitionDType.INT32):
-      row_splits_dtype = tf.int32
-    ragged_rank = self._outer_ragged_rank + len(self._ragged_partitions)
-    shape = [None] * (ragged_rank + 1) + self._inner_fixed_shape
-    return typing.cast(
-        tf.TypeSpec,
-        tf.RaggedTensorSpec(
-            shape,
-            self._dtype,
-            ragged_rank=ragged_rank,
-            row_splits_dtype=row_splits_dtype))
-
-  def GetTensor(self, record_batch: pa.RecordBatch,
-                produce_eager_tensors: bool) -> Union[np.ndarray, tf.Tensor]:
-    if (self._row_partition_dtype ==
-        schema_pb2.TensorRepresentation.RowPartitionDType.INT32):
-      offsets_dtype = np.int32
-    elif (self._row_partition_dtype ==
-          schema_pb2.TensorRepresentation.RowPartitionDType.INT64 or
-          self._row_partition_dtype ==
-          schema_pb2.TensorRepresentation.RowPartitionDType.UNSPECIFIED):
-      offsets_dtype = np.int64
-
-    if produce_eager_tensors:
-      # Skip expensive validation since it's entirely dependent on the
-      # implementation correctness given that the input RecordBatch is valid.
-      factory = functools.partial(
-          tf.RaggedTensor.from_row_splits, validate=False)
-    else:
-      factory = tf.compat.v1.ragged.RaggedTensorValue
-
-    # A RaggedTensor is composed by the following dimensions:
-    # [B, D_0, D_1, ..., D_N, P_0, P_1, ..., P_M, U_0, U_1, ..., U_P]
-    #
-    # These dimensions belong to different categories:
-    # * B: Batch size dimension
-    # * D_n: Dimensions specified by the nested structure from the schema and
-    # the column path to the values. n >= 1.
-    # * P_m: Dimensions specified by the partitions that do not specify a fixed
-    # dimension size. m >= 0.
-    # * U_p: Dimensions specified by the inner uniform row length partitions
-    # that make the inner dimensions fixed. p>=0.
-
-    # Get row splits of each level in the record batch.
-    # Store the row splits for the Dn dimensions that store the representation
-    # of the nested structure on the dataset schema.
-    outer_row_splits = []
-
-    column_path = self._value_path.suffix(1)
-    column = record_batch.column(self._column_index)
-    column_type = column.type
-    # Keep track of an accessor for the parent struct, so we can access other
-    # fields required to get future dimensions row splits.
-    parent_field_accessor = lambda field: record_batch.column(  # pylint:disable=g-long-lambda
-        record_batch.schema.get_field_index(field))
+        # All the index columns must be of integral types.
+        for index_column in sparse_representation.index_column_names:
+            depth, value_type = _GetNestDepthAndValueType(
+                arrow_schema, path.ColumnPath(index_column)
+            )
+            if depth != 1 or not pa.types.is_integer(value_type):
+                return False
 
-    while True:
-      # TODO(b/156514075): add support for handling slices.
-      if column.offset != 0:
-        raise ValueError(
-            "This record batch is sliced. We currently do not handle converting"
-            " slices to RaggedTensors.")
-      if pa.types.is_struct(column_type):
-        parent_column = column
-        parent_field_accessor = parent_column.field
-        column = column.field(column_path.initial_step())
-        column_path = column_path.suffix(1)
-        column_type = column.type
-      elif _IsListLike(column_type):
-        # Note that we are using raw offsets and values assuming that the array
-        # is not sliced (validated above) and there is no null elements backed
-        # by non-empty lists (too expensive to validate).
-        outer_row_splits.append(np.asarray(column.offsets, dtype=offsets_dtype))
-        column = column.values
-        column_type = column.type
-      else:
-        break
-
-    # Now that we have stored the row splits for the Dn dimensions, lets
-    # start the construction of the RaggedTensor from the inner dimensions to
-    # the outermost.
-
-    # Take the values and set the shape for the inner most dimensions (Up)
-    if self._convert_to_binary_fn is not None:
-      column = self._convert_to_binary_fn(column)
-    ragged_tensor = np.reshape(np.asarray(column), self._values_fixed_shape)
-
-    # Build the RaggedTensor from the values and the specified partitions.
-
-    # Now iterate from inner most partitions to outermost.
-    # But first we need pop the last row split from the outer dimensions (D_n)
-    # and scale it given the number of elements in the inner fixed dimensions.
-    try:
-      outer_last_row_split = _FloorDivide(outer_row_splits.pop(),
-                                          self._inferred_dimensions_elements)
-    except RuntimeError as e:
-      raise ValueError(
-          ("The values features lenghts cannot support "
-           "the claimed fixed shape {}").format(self._inner_fixed_shape)) from e
-
-    # Keep track of the previous dimension to help building row splits when an
-    # uniform row length partition is found.
-    prev_dimension = ragged_tensor.shape[0]
-    for partition in reversed(self._ragged_partitions):
-      if partition.HasField("uniform_row_length"):
-        # If a uniform row length partition is found, we need to scale down the
-        # last outer dimension row split.
-        try:
-          outer_last_row_split = _FloorDivide(outer_last_row_split,
-                                              partition.uniform_row_length)
-        except RuntimeError as e:
-          raise ValueError(("The values features lengths cannnot support the "
-                            "specified uniform row length of size {}").format(
-                                partition.uniform_row_length)) from e
+        depth, value_type = _GetNestDepthAndValueType(
+            arrow_schema, path.ColumnPath(sparse_representation.value_column_name)
+        )
+        return depth == 1 and _IsSupportedArrowValueType(value_type)
 
-        row_splits = np.arange(
-            0,
-            prev_dimension + 1,
-            partition.uniform_row_length,
-            dtype=offsets_dtype)
 
-        ragged_tensor = factory(ragged_tensor, row_splits=row_splits)
+class _RaggedTensorHandler(_TypeHandler):
+    """Handles conversion to RaggedTensors."""
+
+    __slots__ = [
+        "_column_index",
+        "_value_path",
+        "_dtype",
+        "_row_partition_dtype",
+        "_convert_to_binary_fn",
+        "_inner_fixed_shape",
+        "_values_fixed_shape",
+        "_inferred_dimensions_elements",
+        "_outer_ragged_rank",
+        "_ragged_partitions",
+        "_fixed_dimension_partitions",
+    ]
+
+    def __init__(
+        self,
+        arrow_schema: pa.Schema,
+        tensor_representation: schema_pb2.TensorRepresentation,
+    ):
+        super().__init__(arrow_schema, tensor_representation)
+        ragged_representation = tensor_representation.ragged_tensor
+
+        self._value_path = path.ColumnPath.from_proto(
+            ragged_representation.feature_path
+        )
+        self._column_index = arrow_schema.get_field_index(
+            ragged_representation.feature_path.step[0]
+        )
+        self._outer_ragged_rank, value_type = _GetNestDepthAndValueType(
+            arrow_schema, self._value_path
+        )
+
+        # Split partitions to the ones defining Ragged dimensions and the ones
+        # defining the outer dimensions shape (through uniform row length
+        # partitions).
+        fixed_dimension = True
+        ragged_partitions = []
+        fixed_dimension_partitions = []
+        # Reverse through the partitions (from outer partition to inner), in order
+        # to extract the inner fixed shape of the resulting RaggedTensor.
+        for partition in reversed(ragged_representation.partition):
+            if partition.HasField("uniform_row_length") and fixed_dimension:
+                fixed_dimension_partitions.append(partition)
+            else:
+                fixed_dimension = False
+                ragged_partitions.append(partition)
+        self._ragged_partitions = ragged_partitions[::-1]
+        self._fixed_dimension_partitions = fixed_dimension_partitions[::-1]
+
+        inner_fixed_shape = []
+        inferred_dimensions_elements = 1
+        for partition in self._fixed_dimension_partitions:
+            inner_fixed_shape.append(partition.uniform_row_length)
+            inferred_dimensions_elements *= partition.uniform_row_length
+        self._inner_fixed_shape = inner_fixed_shape
+        self._values_fixed_shape = [-1] + inner_fixed_shape
+        self._inferred_dimensions_elements = inferred_dimensions_elements
+
+        self._dtype = _ArrowTypeToTfDtype(value_type)
+        self._row_partition_dtype = ragged_representation.row_partition_dtype
+        self._convert_to_binary_fn = _GetConvertToBinaryFn(value_type)
+
+    @property
+    def type_spec(self) -> tf.TypeSpec:
+        row_splits_dtype = tf.int64
+        if (
+            self._row_partition_dtype
+            == schema_pb2.TensorRepresentation.RowPartitionDType.INT32
+        ):
+            row_splits_dtype = tf.int32
+        ragged_rank = self._outer_ragged_rank + len(self._ragged_partitions)
+        shape = [None] * (ragged_rank + 1) + self._inner_fixed_shape
+        return typing.cast(
+            tf.TypeSpec,
+            tf.RaggedTensorSpec(
+                shape,
+                self._dtype,
+                ragged_rank=ragged_rank,
+                row_splits_dtype=row_splits_dtype,
+            ),
+        )
+
+    def GetTensor(
+        self, record_batch: pa.RecordBatch, produce_eager_tensors: bool
+    ) -> Union[np.ndarray, tf.Tensor]:
+        if (
+            self._row_partition_dtype
+            == schema_pb2.TensorRepresentation.RowPartitionDType.INT32
+        ):
+            offsets_dtype = np.int32
+        elif (
+            self._row_partition_dtype
+            == schema_pb2.TensorRepresentation.RowPartitionDType.INT64
+            or self._row_partition_dtype
+            == schema_pb2.TensorRepresentation.RowPartitionDType.UNSPECIFIED
+        ):
+            offsets_dtype = np.int64
+
+        if produce_eager_tensors:
+            # Skip expensive validation since it's entirely dependent on the
+            # implementation correctness given that the input RecordBatch is valid.
+            factory = functools.partial(tf.RaggedTensor.from_row_splits, validate=False)
+        else:
+            factory = tf.compat.v1.ragged.RaggedTensorValue
+
+        # A RaggedTensor is composed by the following dimensions:
+        # [B, D_0, D_1, ..., D_N, P_0, P_1, ..., P_M, U_0, U_1, ..., U_P]
+        #
+        # These dimensions belong to different categories:
+        # * B: Batch size dimension
+        # * D_n: Dimensions specified by the nested structure from the schema and
+        # the column path to the values. n >= 1.
+        # * P_m: Dimensions specified by the partitions that do not specify a fixed
+        # dimension size. m >= 0.
+        # * U_p: Dimensions specified by the inner uniform row length partitions
+        # that make the inner dimensions fixed. p>=0.
+
+        # Get row splits of each level in the record batch.
+        # Store the row splits for the Dn dimensions that store the representation
+        # of the nested structure on the dataset schema.
+        outer_row_splits = []
+
+        column_path = self._value_path.suffix(1)
+        column = record_batch.column(self._column_index)
+        column_type = column.type
+        # Keep track of an accessor for the parent struct, so we can access other
+        # fields required to get future dimensions row splits.
+        parent_field_accessor = lambda field: record_batch.column(  # pylint:disable=g-long-lambda
+            record_batch.schema.get_field_index(field)
+        )
+
+        while True:
+            # TODO(b/156514075): add support for handling slices.
+            if column.offset != 0:
+                raise ValueError(
+                    "This record batch is sliced. We currently do not handle converting"
+                    " slices to RaggedTensors."
+                )
+            if pa.types.is_struct(column_type):
+                parent_column = column
+                parent_field_accessor = parent_column.field
+                column = column.field(column_path.initial_step())
+                column_path = column_path.suffix(1)
+                column_type = column.type
+            elif _IsListLike(column_type):
+                # Note that we are using raw offsets and values assuming that the array
+                # is not sliced (validated above) and there is no null elements backed
+                # by non-empty lists (too expensive to validate).
+                outer_row_splits.append(np.asarray(column.offsets, dtype=offsets_dtype))
+                column = column.values
+                column_type = column.type
+            else:
+                break
+
+        # Now that we have stored the row splits for the Dn dimensions, lets
+        # start the construction of the RaggedTensor from the inner dimensions to
+        # the outermost.
+
+        # Take the values and set the shape for the inner most dimensions (Up)
+        if self._convert_to_binary_fn is not None:
+            column = self._convert_to_binary_fn(column)
+        ragged_tensor = np.reshape(np.asarray(column), self._values_fixed_shape)
+
+        # Build the RaggedTensor from the values and the specified partitions.
+
+        # Now iterate from inner most partitions to outermost.
+        # But first we need pop the last row split from the outer dimensions (D_n)
+        # and scale it given the number of elements in the inner fixed dimensions.
         try:
-          prev_dimension = _FloorDivide(prev_dimension,
-                                        partition.uniform_row_length)
+            outer_last_row_split = _FloorDivide(
+                outer_row_splits.pop(), self._inferred_dimensions_elements
+            )
         except RuntimeError as e:
-          raise ValueError(
-              ("The previous ragged partitions contained {} elements, "
-               "which are not valid with the specified uniform row length: {}"
-              ).format(prev_dimension, partition.uniform_row_length)) from e
-
-      elif partition.HasField("row_length"):
-        row_length_array = parent_field_accessor(partition.row_length)
-
-        # When the outer most dimension specified by the partitions (P_0) comes
-        # from another array other than values, we need to update the last
-        # dimension row splits defined by the nested structure (D_n) given the
-        # offsets of the array.
-        outer_last_row_split = np.asarray(
-            row_length_array.offsets, dtype=offsets_dtype)
-
-        # Build row splits.
-        row_length = np.asarray(row_length_array.flatten())
-        row_splits = np.zeros(len(row_length) + 1, dtype=offsets_dtype)
-        np.cumsum(row_length, out=row_splits[1:])
-
-        if prev_dimension != row_splits[-1]:
-          raise ValueError(
-              ("The sum of row lengts provided in '{}' do not match "
-               "with previous dimension found {}.").format(
-                   partition.row_length, prev_dimension))
-
-        ragged_tensor = factory(ragged_tensor, row_splits=row_splits)
-        prev_dimension = len(row_length)
-
-      else:
-        raise ValueError("Empty partition found.")
-
-    # Add back the last row split from the outer dimensions (D_n).
-    outer_row_splits.append(outer_last_row_split)
-
-    # Apply the outer ragged dimensions to thre resulting tensor.
-    # Now that the RaggedTensor is build up to the P_0 dimensions, we need to
-    # specify the row splits for the D_n dimensions.
-    for row_split in reversed(outer_row_splits):
-      ragged_tensor = factory(ragged_tensor, row_splits=row_split)
-
-    return ragged_tensor
-
-  @staticmethod
-  def CanHandle(arrow_schema: pa.Schema,
-                tensor_representation: schema_pb2.TensorRepresentation) -> bool:
-    """Returns whether `tensor_representation` can be handled.
-
-    The case where the tensor_representation cannot be handled is when:
-    1. Wrong column name / field name requested.
-    2. Non-leaf field is requested (for StructTypes).
-    3. There does not exist a ListType along the path.
-    4. Requested partitions paths are not an integer values or doesn't exist.
-
-    Args:
-      arrow_schema: The pyarrow schema.
-      tensor_representation: The TensorRepresentation proto.
-    """
-    ragged_tensor = tensor_representation.ragged_tensor
-    if len(ragged_tensor.feature_path.step) < 1:
-      return False
+            raise ValueError(
+                "The values features lenghts cannot support "
+                f"the claimed fixed shape {self._inner_fixed_shape}"
+            ) from e
+
+        # Keep track of the previous dimension to help building row splits when an
+        # uniform row length partition is found.
+        prev_dimension = ragged_tensor.shape[0]
+        for partition in reversed(self._ragged_partitions):
+            if partition.HasField("uniform_row_length"):
+                # If a uniform row length partition is found, we need to scale down the
+                # last outer dimension row split.
+                try:
+                    outer_last_row_split = _FloorDivide(
+                        outer_last_row_split, partition.uniform_row_length
+                    )
+                except RuntimeError as e:
+                    raise ValueError(
+                        "The values features lengths cannnot support the "
+                        f"specified uniform row length of size {partition.uniform_row_length}"
+                    ) from e
+
+                row_splits = np.arange(
+                    0,
+                    prev_dimension + 1,
+                    partition.uniform_row_length,
+                    dtype=offsets_dtype,
+                )
+
+                ragged_tensor = factory(ragged_tensor, row_splits=row_splits)
+                try:
+                    prev_dimension = _FloorDivide(
+                        prev_dimension, partition.uniform_row_length
+                    )
+                except RuntimeError as e:
+                    raise ValueError(
+                        f"The previous ragged partitions contained {prev_dimension} elements, "
+                        f"which are not valid with the specified uniform row length: {partition.uniform_row_length}"
+                    ) from e
+
+            elif partition.HasField("row_length"):
+                row_length_array = parent_field_accessor(partition.row_length)
+
+                # When the outer most dimension specified by the partitions (P_0) comes
+                # from another array other than values, we need to update the last
+                # dimension row splits defined by the nested structure (D_n) given the
+                # offsets of the array.
+                outer_last_row_split = np.asarray(
+                    row_length_array.offsets, dtype=offsets_dtype
+                )
+
+                # Build row splits.
+                row_length = np.asarray(row_length_array.flatten())
+                row_splits = np.zeros(len(row_length) + 1, dtype=offsets_dtype)
+                np.cumsum(row_length, out=row_splits[1:])
+
+                if prev_dimension != row_splits[-1]:
+                    raise ValueError(
+                        f"The sum of row lengts provided in '{partition.row_length}' do not match "
+                        f"with previous dimension found {prev_dimension}."
+                    )
+
+                ragged_tensor = factory(ragged_tensor, row_splits=row_splits)
+                prev_dimension = len(row_length)
+
+            else:
+                raise ValueError("Empty partition found.")
+
+        # Add back the last row split from the outer dimensions (D_n).
+        outer_row_splits.append(outer_last_row_split)
+
+        # Apply the outer ragged dimensions to thre resulting tensor.
+        # Now that the RaggedTensor is build up to the P_0 dimensions, we need to
+        # specify the row splits for the D_n dimensions.
+        for row_split in reversed(outer_row_splits):
+            ragged_tensor = factory(ragged_tensor, row_splits=row_split)
+
+        return ragged_tensor
+
+    @staticmethod
+    def CanHandle(
+        arrow_schema: pa.Schema, tensor_representation: schema_pb2.TensorRepresentation
+    ) -> bool:
+        """Returns whether `tensor_representation` can be handled.
+
+        The case where the tensor_representation cannot be handled is when:
+        1. Wrong column name / field name requested.
+        2. Non-leaf field is requested (for StructTypes).
+        3. There does not exist a ListType along the path.
+        4. Requested partitions paths are not an integer values or doesn't exist.
+
+        Args:
+        ----
+          arrow_schema: The pyarrow schema.
+          tensor_representation: The TensorRepresentation proto.
+        """
+        ragged_tensor = tensor_representation.ragged_tensor
+        if len(ragged_tensor.feature_path.step) < 1:
+            return False
 
-    value_path = path.ColumnPath.from_proto(ragged_tensor.feature_path)
+        value_path = path.ColumnPath.from_proto(ragged_tensor.feature_path)
 
-    # Checking the outer dimensions represented by the value feature path.
-    contains_list = False
-    try:
-      arrow_type = None
-      for arrow_type in _EnumerateTypesAlongPath(arrow_schema, value_path):
-        if _IsListLike(arrow_type):
-          contains_list = True
-      if pa.types.is_struct(arrow_type):
-        # The path is depleted, but the last arrow_type is a struct. This means
-        # the path is a Non-leaf field.
-        return False
-    except ValueError:
-      # ValueError signifies wrong column name / field name requested.
-      return False
-    if not contains_list:
-      return False
-
-    # Check the auxiliar features that need to be accessed to form the inner
-    # dimensions partitions.
-    parent_path = value_path.parent()
-
-    # Check the columns exists and have correct depth and type.
-    for partition in ragged_tensor.partition:
-      if partition.HasField("row_length"):
+        # Checking the outer dimensions represented by the value feature path.
+        contains_list = False
         try:
-          field_path = parent_path.child(partition.row_length)
-          # To avoid loop undefined variable lint error.
-          partition_type = arrow_schema.field(field_path.initial_step()).type
-          for partition_type in _EnumerateTypesAlongPath(
-              arrow_schema, field_path, stop_at_path_end=True):
-            # Iterate through them all. Only interested on the last type.
-            pass
-          if not _IsListLike(partition_type) or not pa.types.is_integer(
-              partition_type.value_type):
-            return False
+            arrow_type = None
+            for arrow_type in _EnumerateTypesAlongPath(arrow_schema, value_path):
+                if _IsListLike(arrow_type):
+                    contains_list = True
+            if pa.types.is_struct(arrow_type):
+                # The path is depleted, but the last arrow_type is a struct. This means
+                # the path is a Non-leaf field.
+                return False
         except ValueError:
-          # ValueError signifies wrong column name / field name requested.
-          return False
-
-      elif partition.HasField("uniform_row_length"):
-        if partition.uniform_row_length <= 0:
-          return False
-      else:
-        return False
+            # ValueError signifies wrong column name / field name requested.
+            return False
+        if not contains_list:
+            return False
 
-    # All checks passed successfully.
-    return True
+        # Check the auxiliar features that need to be accessed to form the inner
+        # dimensions partitions.
+        parent_path = value_path.parent()
+
+        # Check the columns exists and have correct depth and type.
+        for partition in ragged_tensor.partition:
+            if partition.HasField("row_length"):
+                try:
+                    field_path = parent_path.child(partition.row_length)
+                    # To avoid loop undefined variable lint error.
+                    partition_type = arrow_schema.field(field_path.initial_step()).type
+                    for partition_type in _EnumerateTypesAlongPath(
+                        arrow_schema, field_path, stop_at_path_end=True
+                    ):
+                        # Iterate through them all. Only interested on the last type.
+                        pass
+                    if not _IsListLike(partition_type) or not pa.types.is_integer(
+                        partition_type.value_type
+                    ):
+                        return False
+                except ValueError:
+                    # ValueError signifies wrong column name / field name requested.
+                    return False
+
+            elif partition.HasField("uniform_row_length"):
+                if partition.uniform_row_length <= 0:
+                    return False
+            else:
+                return False
+
+        # All checks passed successfully.
+        return True
 
 
 # Mapping from TensorRepresentation's "kind" oneof field name to TypeHandler
@@ -790,191 +874,206 @@ def CanHandle(arrow_schema: pa.Schema,
 
 def _BuildTypeHandlers(
     tensor_representations: Dict[str, schema_pb2.TensorRepresentation],
-    arrow_schema: pa.Schema) -> List[Tuple[str, _TypeHandler]]:
-  """Builds type handlers according to TensorRepresentations."""
-  result = []
-  for tensor_name, rep in tensor_representations.items():
-    potential_handlers = _TYPE_HANDLER_MAP.get(rep.WhichOneof("kind"))
-    if not potential_handlers:
-      raise ValueError("Unable to handle tensor {} with rep {}".format(
-          tensor_name, rep))
-    found_handler = False
-    for h in potential_handlers:
-      if h.CanHandle(arrow_schema, rep):
-        found_handler = True
-        result.append((tensor_name, h(arrow_schema, rep)))
-        break
-    if not found_handler:
-      raise ValueError("Unable to handle tensor {} with rep {} "
-                       "against schema: {}".format(tensor_name, rep,
-                                                   arrow_schema))
-
-  return result
+    arrow_schema: pa.Schema,
+) -> List[Tuple[str, _TypeHandler]]:
+    """Builds type handlers according to TensorRepresentations."""
+    result = []
+    for tensor_name, rep in tensor_representations.items():
+        potential_handlers = _TYPE_HANDLER_MAP.get(rep.WhichOneof("kind"))
+        if not potential_handlers:
+            raise ValueError(f"Unable to handle tensor {tensor_name} with rep {rep}")
+        found_handler = False
+        for h in potential_handlers:
+            if h.CanHandle(arrow_schema, rep):
+                found_handler = True
+                result.append((tensor_name, h(arrow_schema, rep)))
+                break
+        if not found_handler:
+            raise ValueError(
+                f"Unable to handle tensor {tensor_name} with rep {rep} "
+                f"against schema: {arrow_schema}"
+            )
+
+    return result
 
 
 def _IsListLike(arrow_type: pa.DataType) -> bool:
-  return pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type)
+    return pa.types.is_list(arrow_type) or pa.types.is_large_list(arrow_type)
 
 
 def _GetNestDepthAndValueType(
+    arrow_schema: pa.Schema, column_path: path.ColumnPath
+) -> Tuple[int, pa.DataType]:
+    """Returns the depth of a leaf field, and its innermost value type.
+
+    The Depth is constituted by the number of nested lists in the leaf field.
+
+    Args:
+    ----
+      arrow_schema: The arrow schema to traverse.
+      column_path: A path of field names. The path must describe a leaf struct.
+    Returns: A Tuple of depth and arrow type
+    """
+    arrow_type = arrow_schema.field(column_path.steps()[0]).type
+    depth = 0
+
+    for arrow_type in _EnumerateTypesAlongPath(arrow_schema, column_path):
+        if _IsListLike(arrow_type):
+            depth += 1
+
+    return depth, arrow_type
+
+
+def _EnumerateTypesAlongPath(
     arrow_schema: pa.Schema,
-    column_path: path.ColumnPath) -> Tuple[int, pa.DataType]:
-  """Returns the depth of a leaf field, and its innermost value type.
-
-  The Depth is constituted by the number of nested lists in the leaf field.
-
-  Args:
-    arrow_schema: The arrow schema to traverse.
-    column_path: A path of field names. The path must describe a leaf struct.
-  Returns: A Tuple of depth and arrow type
-  """
-  arrow_type = arrow_schema.field(column_path.steps()[0]).type
-  depth = 0
-
-  for arrow_type in _EnumerateTypesAlongPath(arrow_schema, column_path):
-    if _IsListLike(arrow_type):
-      depth += 1
-
-  return depth, arrow_type
-
-
-def _EnumerateTypesAlongPath(arrow_schema: pa.Schema,
-                             column_path: path.ColumnPath,
-                             stop_at_path_end: bool = False) -> pa.DataType:
-  """Enumerates nested types along a column_path.
-
-  A nested type is either a list-like type or a struct type.
-
-  It uses `column_path`[0] to first address a field in the schema, and
-  enumerates its type. If that type is nested, it enumerates its child and
-  continues recursively until the column_path reaches an end. The child of a
-  list-like type is its value type. The child of a struct type is the type of
-  the child field of the name given by the corresponding step in the
-  column_path.
-
-  Args:
-    arrow_schema: The arrow schema to traverse.
-    column_path: A path of field names.
-    stop_at_path_end: Whether to stop enumerating when all paths in the
-      column_path have been visited. This will avoid keep enumerating on lists
-      nesteness.
-
-  Yields:
-    The arrow type of each level in the schema.
-
-  Raises:
-    ValueError: If a step does not exist in the arrow schema.
-    ValueError: If arrow_schema has no more struct fields, but we did not
-                iterate through every field in column_path.
-  """
-  field_name = column_path.initial_step()
-  column_path = column_path.suffix(1)
-
-  arrow_field = arrow_schema.field(field_name)
-  arrow_type = arrow_field.type
-  yield arrow_type
-
-  while True:
-    if stop_at_path_end and not column_path:
-      break
-    if pa.types.is_struct(arrow_type):
-      # get the field from the StructType
-      if not column_path:
-        break
-      curr_field_name = column_path.initial_step()
-      column_path = column_path.suffix(1)
-      try:
-        arrow_field = arrow_type[curr_field_name]
-      except KeyError as e:
-        raise ValueError(
-            "Field '{}' could not be found in the current Struct: '{}'".format(
-                curr_field_name, arrow_type)) from e
-      arrow_type = arrow_field.type
-    elif _IsListLike(arrow_type):
-      arrow_type = arrow_type.value_type
-    else:
-      yield arrow_type
-      if column_path:
-        raise ValueError(
-            "The arrow_schema fields are exhausted, but there are remaining "
-            "fields in the column_path: '{}'".format(column_path))
-      break
+    column_path: path.ColumnPath,
+    stop_at_path_end: bool = False,
+) -> pa.DataType:
+    """Enumerates nested types along a column_path.
+
+    A nested type is either a list-like type or a struct type.
+
+    It uses `column_path`[0] to first address a field in the schema, and
+    enumerates its type. If that type is nested, it enumerates its child and
+    continues recursively until the column_path reaches an end. The child of a
+    list-like type is its value type. The child of a struct type is the type of
+    the child field of the name given by the corresponding step in the
+    column_path.
+
+    Args:
+    ----
+      arrow_schema: The arrow schema to traverse.
+      column_path: A path of field names.
+      stop_at_path_end: Whether to stop enumerating when all paths in the
+        column_path have been visited. This will avoid keep enumerating on lists
+        nesteness.
+
+    Yields:
+    ------
+      The arrow type of each level in the schema.
+
+    Raises:
+    ------
+      ValueError: If a step does not exist in the arrow schema.
+      ValueError: If arrow_schema has no more struct fields, but we did not
+                  iterate through every field in column_path.
+    """
+    field_name = column_path.initial_step()
+    column_path = column_path.suffix(1)
+
+    arrow_field = arrow_schema.field(field_name)
+    arrow_type = arrow_field.type
     yield arrow_type
 
+    while True:
+        if stop_at_path_end and not column_path:
+            break
+        if pa.types.is_struct(arrow_type):
+            # get the field from the StructType
+            if not column_path:
+                break
+            curr_field_name = column_path.initial_step()
+            column_path = column_path.suffix(1)
+            try:
+                arrow_field = arrow_type[curr_field_name]
+            except KeyError as e:
+                raise ValueError(
+                    f"Field '{curr_field_name}' could not be found in the current Struct: '{arrow_type}'"
+                ) from e
+            arrow_type = arrow_field.type
+        elif _IsListLike(arrow_type):
+            arrow_type = arrow_type.value_type
+        else:
+            yield arrow_type
+            if column_path:
+                raise ValueError(
+                    "The arrow_schema fields are exhausted, but there are remaining "
+                    f"fields in the column_path: '{column_path}'"
+                )
+            break
+        yield arrow_type
+
 
 def _IsBinaryLike(arrow_type: pa.DataType) -> bool:
-  return (pa.types.is_binary(arrow_type) or
-          pa.types.is_large_binary(arrow_type) or
-          pa.types.is_string(arrow_type) or
-          pa.types.is_large_string(arrow_type))
+    return (
+        pa.types.is_binary(arrow_type)
+        or pa.types.is_large_binary(arrow_type)
+        or pa.types.is_string(arrow_type)
+        or pa.types.is_large_string(arrow_type)
+    )
 
 
 def _IsSupportedArrowValueType(arrow_type: pa.DataType) -> bool:
-  return (pa.types.is_integer(arrow_type) or pa.types.is_floating(arrow_type) or
-          _IsBinaryLike(arrow_type))
+    return (
+        pa.types.is_integer(arrow_type)
+        or pa.types.is_floating(arrow_type)
+        or _IsBinaryLike(arrow_type)
+    )
 
 
 def _ArrowTypeToTfDtype(arrow_type: pa.DataType) -> tf.DType:
-  # TODO(zhuo): Remove the special handling for LargeString/Binary when
-  # to_pandas_dtype() can handle them.
-  if _IsBinaryLike(arrow_type):
-    return tf.string
-  return tf.dtypes.as_dtype(arrow_type.to_pandas_dtype())
+    # TODO(zhuo): Remove the special handling for LargeString/Binary when
+    # to_pandas_dtype() can handle them.
+    if _IsBinaryLike(arrow_type):
+        return tf.string
+    return tf.dtypes.as_dtype(arrow_type.to_pandas_dtype())
 
 
 def _GetAllowedDefaultValue(
     value_type: pa.DataType,
-    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue
+    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue,
 ) -> Union[int, float, bytes]:
-  """Returns the default value set in DefaultValue proto or raises."""
-  kind = default_value_proto.WhichOneof("kind")
-  if kind in ("int_value", "uint_value") and pa.types.is_integer(value_type):
-    value = getattr(default_value_proto, kind)
-    iinfo = np.iinfo(value_type.to_pandas_dtype())
-    if value <= iinfo.max and value >= iinfo.min:
-      return value
-    else:
-      raise ValueError("Integer default value out of range: {} is set for a "
-                       "{} column".format(value, value_type))
-  elif kind == "float_value" and pa.types.is_floating(value_type):
-    return default_value_proto.float_value
-  elif kind == "bytes_value" and _IsBinaryLike(value_type):
-    return default_value_proto.bytes_value
-
-  raise ValueError(
-      "Incompatible default value: {} is set for a {} column".format(
-          kind, value_type))
+    """Returns the default value set in DefaultValue proto or raises."""
+    kind = default_value_proto.WhichOneof("kind")
+    if kind in ("int_value", "uint_value") and pa.types.is_integer(value_type):
+        value = getattr(default_value_proto, kind)
+        iinfo = np.iinfo(value_type.to_pandas_dtype())
+        if value <= iinfo.max and value >= iinfo.min:
+            return value
+        else:
+            raise ValueError(
+                f"Integer default value out of range: {value} is set for a "
+                f"{value_type} column"
+            )
+    elif kind == "float_value" and pa.types.is_floating(value_type):
+        return default_value_proto.float_value
+    elif kind == "bytes_value" and _IsBinaryLike(value_type):
+        return default_value_proto.bytes_value
+
+    raise ValueError(
+        f"Incompatible default value: {kind} is set for a {value_type} column"
+    )
 
 
 def _GetDefaultFill(
-    unbatched_shape: List[int], value_type: pa.DataType,
-    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue
+    unbatched_shape: List[int],
+    value_type: pa.DataType,
+    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue,
 ) -> pa.Array:
-  """Returns an Array full of the default value given in the proto."""
-
-  size = int(np.prod(unbatched_shape, initial=1))
-  return pa.array(
-      [_GetAllowedDefaultValue(value_type, default_value_proto)] * size,
-      type=value_type)
+    """Returns an Array full of the default value given in the proto."""
+    size = int(np.prod(unbatched_shape, initial=1))
+    return pa.array(
+        [_GetAllowedDefaultValue(value_type, default_value_proto)] * size,
+        type=value_type,
+    )
 
 
 def _GetConvertToBinaryFn(
-    array_type: pa.DataType) -> Optional[Callable[[pa.Array], pa.Array]]:
-  """Returns a function that converts a StringArray to BinaryArray."""
-
-  if pa.types.is_string(array_type):
-    return lambda array: array.view(pa.binary())
-  if pa.types.is_large_string(array_type):
-    return lambda array: array.view(pa.large_binary())
-  return None
+    array_type: pa.DataType,
+) -> Optional[Callable[[pa.Array], pa.Array]]:
+    """Returns a function that converts a StringArray to BinaryArray."""
+    if pa.types.is_string(array_type):
+        return lambda array: array.view(pa.binary())
+    if pa.types.is_large_string(array_type):
+        return lambda array: array.view(pa.large_binary())
+    return None
 
 
 def _FloorDivide(array, num_elements: int):
-  # The most common trivial case can avoid producing new arrays.
-  if num_elements == 1:
-    return array
-  result, remainder = np.divmod(array, num_elements)
-  if not np.all(remainder == 0):
-    raise RuntimeError(
-        "Remainder found when dividing array with {}.".format(num_elements))
-  return result
+    # The most common trivial case can avoid producing new arrays.
+    if num_elements == 1:
+        return array
+    result, remainder = np.divmod(array, num_elements)
+    if not np.all(remainder == 0):
+        raise RuntimeError(f"Remainder found when dividing array with {num_elements}.")
+    return result
diff --git a/tfx_bsl/tfxio/tensor_adapter_test.py b/tfx_bsl/tfxio/tensor_adapter_test.py
index 66c834ce..ce32bcef 100644
--- a/tfx_bsl/tfxio/tensor_adapter_test.py
+++ b/tfx_bsl/tfxio/tensor_adapter_test.py
@@ -18,14 +18,15 @@
 import numpy as np
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.tfxio import tensor_adapter
-
+from absl.testing import absltest, parameterized
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
-from tensorflow.python.framework import test_util  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.framework import (
+    test_util,  # pylint: disable=g-direct-tensorflow-import
+)
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import tensor_adapter
+
 _ALL_SUPPORTED_INT_VALUE_TYPES = [
     pa.int8(),
     pa.int16(),
@@ -38,12 +39,16 @@
 ]
 _ALL_SUPPORTED_FLOATING_VALUE_TYPES = [pa.float32(), pa.float64()]
 _ALL_SUPPORTED_STRING_VALUE_TYPES = [
-    pa.binary(), pa.large_binary(),
-    pa.string(), pa.large_string()
+    pa.binary(),
+    pa.large_binary(),
+    pa.string(),
+    pa.large_string(),
 ]
 _ALL_SUPPORTED_VALUE_TYPES = (
-    _ALL_SUPPORTED_INT_VALUE_TYPES + _ALL_SUPPORTED_FLOATING_VALUE_TYPES +
-    _ALL_SUPPORTED_STRING_VALUE_TYPES)
+    _ALL_SUPPORTED_INT_VALUE_TYPES
+    + _ALL_SUPPORTED_FLOATING_VALUE_TYPES
+    + _ALL_SUPPORTED_STRING_VALUE_TYPES
+)
 _ARROW_TYPE_TO_TF_TYPE = {
     pa.int8(): tf.int8,
     pa.int16(): tf.int16,
@@ -79,8 +84,8 @@
 
 
 def _Make1DSparseTensorTestCases():
-  result = []
-  tensor_representation_textpb = """
+    result = []
+    tensor_representation_textpb = """
   sparse_tensor {
     index_column_names: ["key"]
     value_column_name: "value"
@@ -91,56 +96,59 @@ def _Make1DSparseTensorTestCases():
     }
   }
   """
-  for t in _ALL_SUPPORTED_VALUE_TYPES:
-    for list_type_factory in (("list", pa.list_), ("large_list",
-                                                   pa.large_list)):
-      expected_type_spec = tf.SparseTensorSpec([None, 100],
-                                               _ARROW_TYPE_TO_TF_TYPE[t])
-      if pa.types.is_integer(t):
-        values = [[1, 2], None, [], [3]]
-        expected_values = [1, 2, 3]
-      elif pa.types.is_floating(t):
-        values = [[1.0, 2.0], None, [], [3.0]]
-        expected_values = [1.0, 2.0, 3.0]
-      else:
-        values = [[b"a", b"b"], None, [], [b"c"]]
-        expected_values = [b"a", b"b", b"c"]
-      indices = [[0, 99], None, [], [8]]
-
-      if tf.executing_eagerly():
-        expected_output = tf.sparse.SparseTensor(
-            indices=[[0, 0], [0, 99], [3, 8]],
-            values=tf.constant(
-                expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]),
-            dense_shape=(4, 100))
-      else:
-        expected_output = tf.compat.v1.SparseTensorValue(
-            indices=[[0, 0], [0, 99], [3, 8]],
-            values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
-            dense_shape=(4, 100))
-
-      result.append({
-          "testcase_name":
-              "1dsparse_tensor_{}_{}".format(t, list_type_factory[0]),
-          "tensor_representation_textpb":
-              tensor_representation_textpb,
-          "record_batch":
-              pa.RecordBatch.from_arrays([
-                  pa.array(indices, type=list_type_factory[1](pa.int64())),
-                  pa.array(values, type=list_type_factory[1](t))
-              ], ["key", "value"]),
-          "expected_output":
-              expected_output,
-          "expected_type_spec":
-              expected_type_spec,
-      })
-
-  return result
+    for t in _ALL_SUPPORTED_VALUE_TYPES:
+        for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)):
+            expected_type_spec = tf.SparseTensorSpec(
+                [None, 100], _ARROW_TYPE_TO_TF_TYPE[t]
+            )
+            if pa.types.is_integer(t):
+                values = [[1, 2], None, [], [3]]
+                expected_values = [1, 2, 3]
+            elif pa.types.is_floating(t):
+                values = [[1.0, 2.0], None, [], [3.0]]
+                expected_values = [1.0, 2.0, 3.0]
+            else:
+                values = [[b"a", b"b"], None, [], [b"c"]]
+                expected_values = [b"a", b"b", b"c"]
+            indices = [[0, 99], None, [], [8]]
+
+            if tf.executing_eagerly():
+                expected_output = tf.sparse.SparseTensor(
+                    indices=[[0, 0], [0, 99], [3, 8]],
+                    values=tf.constant(
+                        expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]
+                    ),
+                    dense_shape=(4, 100),
+                )
+            else:
+                expected_output = tf.compat.v1.SparseTensorValue(
+                    indices=[[0, 0], [0, 99], [3, 8]],
+                    values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
+                    dense_shape=(4, 100),
+                )
+
+            result.append(
+                {
+                    "testcase_name": f"1dsparse_tensor_{t}_{list_type_factory[0]}",
+                    "tensor_representation_textpb": tensor_representation_textpb,
+                    "record_batch": pa.RecordBatch.from_arrays(
+                        [
+                            pa.array(indices, type=list_type_factory[1](pa.int64())),
+                            pa.array(values, type=list_type_factory[1](t)),
+                        ],
+                        ["key", "value"],
+                    ),
+                    "expected_output": expected_output,
+                    "expected_type_spec": expected_type_spec,
+                }
+            )
+
+    return result
 
 
 def _MakeDenseTensorFromListArrayTestCases():
-  result = []
-  tensor_representation_textpb = """
+    result = []
+    tensor_representation_textpb = """
   dense_tensor {
     column_name: "input"
     shape {
@@ -150,42 +158,38 @@ def _MakeDenseTensorFromListArrayTestCases():
     }
   }
   """
-  for t in _ALL_SUPPORTED_VALUE_TYPES:
-    for list_type_factory in (("list", pa.list_), ("large_list",
-                                                   pa.large_list)):
-      expected_type_spec = tf.TensorSpec([None, 4], _ARROW_TYPE_TO_TF_TYPE[t])
-
-      if pa.types.is_integer(t):
-        values = [[1, 2, 3, 4], [5, 6, 7, 8]]
-      elif pa.types.is_floating(t):
-        values = [[1.0, 2.0, 4.0, 8.0], [-1.0, -2.0, -4.0, -8.0]]
-      else:
-        values = [[b"a", b"b", b"c", b"d"], [b"e", b"f", b"g", b"h"]]
-
-      arrow_array = pa.array(values, type=list_type_factory[1](t))
-      if tf.executing_eagerly():
-        expected_output = tf.constant(values, dtype=_ARROW_TYPE_TO_TF_TYPE[t])
-      else:
-        expected_output = np.array(values, dtype=_ARROW_TYPE_TO_NP_TYPE[t])
-
-      result.append({
-          "testcase_name":
-              "dense_from_{}_array_{}".format(list_type_factory[0], t),
-          "tensor_representation_textpb":
-              tensor_representation_textpb,
-          "arrow_array":
-              arrow_array,
-          "expected_output":
-              expected_output,
-          "expected_type_spec":
-              expected_type_spec,
-      })
-
-  return result
+    for t in _ALL_SUPPORTED_VALUE_TYPES:
+        for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)):
+            expected_type_spec = tf.TensorSpec([None, 4], _ARROW_TYPE_TO_TF_TYPE[t])
+
+            if pa.types.is_integer(t):
+                values = [[1, 2, 3, 4], [5, 6, 7, 8]]
+            elif pa.types.is_floating(t):
+                values = [[1.0, 2.0, 4.0, 8.0], [-1.0, -2.0, -4.0, -8.0]]
+            else:
+                values = [[b"a", b"b", b"c", b"d"], [b"e", b"f", b"g", b"h"]]
+
+            arrow_array = pa.array(values, type=list_type_factory[1](t))
+            if tf.executing_eagerly():
+                expected_output = tf.constant(values, dtype=_ARROW_TYPE_TO_TF_TYPE[t])
+            else:
+                expected_output = np.array(values, dtype=_ARROW_TYPE_TO_NP_TYPE[t])
+
+            result.append(
+                {
+                    "testcase_name": f"dense_from_{list_type_factory[0]}_array_{t}",
+                    "tensor_representation_textpb": tensor_representation_textpb,
+                    "arrow_array": arrow_array,
+                    "expected_output": expected_output,
+                    "expected_type_spec": expected_type_spec,
+                }
+            )
+
+    return result
 
 
 def _MakeIntDefaultFilledDenseTensorFromListArrayTestCases():
-  tensor_representation_textpb = """
+    tensor_representation_textpb = """
   dense_tensor {
     column_name: "input"
     shape {
@@ -201,39 +205,39 @@ def _MakeIntDefaultFilledDenseTensorFromListArrayTestCases():
     }
   }
   """
-  result = []
-  for t in _ALL_SUPPORTED_INT_VALUE_TYPES:
-    for list_type_factory in (("list", pa.list_), ("large_list",
-                                                   pa.large_list)):
-      arrow_array = pa.array([None, [1, 2, 3, 4], None],
-                             type=list_type_factory[1](t))
-      if tf.executing_eagerly():
-        expected_output = tf.constant(
-            [[2, 2, 2, 2], [1, 2, 3, 4], [2, 2, 2, 2]],
-            dtype=_ARROW_TYPE_TO_TF_TYPE[t],
-            shape=(3, 2, 2))
-      else:
-        expected_output = np.array([2, 2, 2, 2, 1, 2, 3, 4, 2, 2, 2, 2],
-                                   dtype=_ARROW_TYPE_TO_NP_TYPE[t]).reshape(
-                                       (3, 2, 2))
-      result.append({
-          "testcase_name":
-              "default_filled_dense_from_{}_array_{}".format(
-                  list_type_factory[0], t),
-          "tensor_representation_textpb":
-              tensor_representation_textpb,
-          "arrow_array":
-              arrow_array,
-          "expected_output":
-              expected_output,
-          "expected_type_spec":
-              tf.TensorSpec([None, 2, 2], _ARROW_TYPE_TO_TF_TYPE[t])
-      })
-  return result
+    result = []
+    for t in _ALL_SUPPORTED_INT_VALUE_TYPES:
+        for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)):
+            arrow_array = pa.array(
+                [None, [1, 2, 3, 4], None], type=list_type_factory[1](t)
+            )
+            if tf.executing_eagerly():
+                expected_output = tf.constant(
+                    [[2, 2, 2, 2], [1, 2, 3, 4], [2, 2, 2, 2]],
+                    dtype=_ARROW_TYPE_TO_TF_TYPE[t],
+                    shape=(3, 2, 2),
+                )
+            else:
+                expected_output = np.array(
+                    [2, 2, 2, 2, 1, 2, 3, 4, 2, 2, 2, 2],
+                    dtype=_ARROW_TYPE_TO_NP_TYPE[t],
+                ).reshape((3, 2, 2))
+            result.append(
+                {
+                    "testcase_name": f"default_filled_dense_from_{list_type_factory[0]}_array_{t}",
+                    "tensor_representation_textpb": tensor_representation_textpb,
+                    "arrow_array": arrow_array,
+                    "expected_output": expected_output,
+                    "expected_type_spec": tf.TensorSpec(
+                        [None, 2, 2], _ARROW_TYPE_TO_TF_TYPE[t]
+                    ),
+                }
+            )
+    return result
 
 
 def _MakeFloatingDefaultFilledDenseTensorFromListArrayTestCases():
-  tensor_representation_textpb = """
+    tensor_representation_textpb = """
   dense_tensor {
     column_name: "input"
     shape {
@@ -249,34 +253,35 @@ def _MakeFloatingDefaultFilledDenseTensorFromListArrayTestCases():
     }
   }
   """
-  result = []
-  for t in _ALL_SUPPORTED_FLOATING_VALUE_TYPES:
-    arrow_array = pa.array([None, [1, 2], None], type=pa.list_(t))
-    if tf.executing_eagerly():
-      expected_output = tf.constant([[-1, -1], [1, 2], [-1, -1]],
-                                    dtype=_ARROW_TYPE_TO_TF_TYPE[t],
-                                    shape=(3, 2, 1))
-    else:
-      expected_output = np.array([-1, -1, 1, 2, -1, -1],
-                                 dtype=_ARROW_TYPE_TO_NP_TYPE[t]).reshape(
-                                     (3, 2, 1))
-    result.append({
-        "testcase_name":
-            "default_filled_dense_from_list_array_{}".format(t),
-        "tensor_representation_textpb":
-            tensor_representation_textpb,
-        "arrow_array":
-            arrow_array,
-        "expected_output":
-            expected_output,
-        "expected_type_spec":
-            tf.TensorSpec([None, 2, 1], dtype=_ARROW_TYPE_TO_TF_TYPE[t])
-    })
-  return result
+    result = []
+    for t in _ALL_SUPPORTED_FLOATING_VALUE_TYPES:
+        arrow_array = pa.array([None, [1, 2], None], type=pa.list_(t))
+        if tf.executing_eagerly():
+            expected_output = tf.constant(
+                [[-1, -1], [1, 2], [-1, -1]],
+                dtype=_ARROW_TYPE_TO_TF_TYPE[t],
+                shape=(3, 2, 1),
+            )
+        else:
+            expected_output = np.array(
+                [-1, -1, 1, 2, -1, -1], dtype=_ARROW_TYPE_TO_NP_TYPE[t]
+            ).reshape((3, 2, 1))
+        result.append(
+            {
+                "testcase_name": f"default_filled_dense_from_list_array_{t}",
+                "tensor_representation_textpb": tensor_representation_textpb,
+                "arrow_array": arrow_array,
+                "expected_output": expected_output,
+                "expected_type_spec": tf.TensorSpec(
+                    [None, 2, 1], dtype=_ARROW_TYPE_TO_TF_TYPE[t]
+                ),
+            }
+        )
+    return result
 
 
 def _MakeStringDefaultFilledDenseTensorFromListArrayTestCases():
-  tensor_representation_textpb = """
+    tensor_representation_textpb = """
   dense_tensor {
     column_name: "input"
     shape {
@@ -286,133 +291,138 @@ def _MakeStringDefaultFilledDenseTensorFromListArrayTestCases():
     }
   }
   """
-  result = []
-  for t in _ALL_SUPPORTED_STRING_VALUE_TYPES:
-    arrow_array = pa.array([None, ["hello"], None], type=pa.list_(t))
-    if tf.executing_eagerly():
-      expected_output = tf.constant(["nil", "hello", "nil"],
-                                    dtype=_ARROW_TYPE_TO_TF_TYPE[t])
-    else:
-      expected_output = np.array([b"nil", b"hello", b"nil"],
-                                 dtype=_ARROW_TYPE_TO_NP_TYPE[t])
-    result.append({
-        "testcase_name": "default_filled_dense_from_list_array_{}".format(t),
-        "tensor_representation_textpb": tensor_representation_textpb,
-        "arrow_array": arrow_array,
-        "expected_output": expected_output,
-        "expected_type_spec": tf.TensorSpec([None], _ARROW_TYPE_TO_TF_TYPE[t])
-    })
-  return result
+    result = []
+    for t in _ALL_SUPPORTED_STRING_VALUE_TYPES:
+        arrow_array = pa.array([None, ["hello"], None], type=pa.list_(t))
+        if tf.executing_eagerly():
+            expected_output = tf.constant(
+                ["nil", "hello", "nil"], dtype=_ARROW_TYPE_TO_TF_TYPE[t]
+            )
+        else:
+            expected_output = np.array(
+                [b"nil", b"hello", b"nil"], dtype=_ARROW_TYPE_TO_NP_TYPE[t]
+            )
+        result.append(
+            {
+                "testcase_name": f"default_filled_dense_from_list_array_{t}",
+                "tensor_representation_textpb": tensor_representation_textpb,
+                "arrow_array": arrow_array,
+                "expected_output": expected_output,
+                "expected_type_spec": tf.TensorSpec([None], _ARROW_TYPE_TO_TF_TYPE[t]),
+            }
+        )
+    return result
 
 
 def _MakeVarLenSparseTensorFromListArrayTestCases():
-  tensor_representation_textpb = """
+    tensor_representation_textpb = """
   varlen_sparse_tensor {
     column_name: "input"
   }
   """
-  result = []
-  for t in _ALL_SUPPORTED_VALUE_TYPES:
-    if pa.types.is_integer(t):
-      values = [[1, 2], None, [3], [], [5]]
-      expected_values = [1, 2, 3, 5]
-    elif pa.types.is_floating(t):
-      values = [[1.0, 2.0], None, [3.0], [], [5.0]]
-      expected_values = [1.0, 2.0, 3.0, 5.0]
-    else:
-      values = [["a", "b"], None, ["c"], [], ["d"]]
-      expected_values = [b"a", b"b", b"c", b"d"]
-    expected_sparse_indices = [[0, 0], [0, 1], [2, 0], [4, 0]]
-    expected_dense_shape = [5, 2]
-    expected_output = tf.compat.v1.SparseTensorValue(
-        indices=np.array(expected_sparse_indices, dtype=np.int64),
-        dense_shape=np.array(expected_dense_shape, dtype=np.int64),
-        values=np.array(expected_values, dtype=_ARROW_TYPE_TO_NP_TYPE[t]))
-    result.append({
-        "testcase_name":
-            "varlen_sparse_from_list_array_{}".format(t),
-        "tensor_representation_textpb":
-            tensor_representation_textpb,
-        "arrow_array":
-            pa.array(values, type=pa.list_(t)),
-        "expected_output":
-            expected_output,
-        "expected_type_spec":
-            tf.SparseTensorSpec(
-                tf.TensorShape([None, None]), _ARROW_TYPE_TO_TF_TYPE[t])
-    })
-
-  return result
+    result = []
+    for t in _ALL_SUPPORTED_VALUE_TYPES:
+        if pa.types.is_integer(t):
+            values = [[1, 2], None, [3], [], [5]]
+            expected_values = [1, 2, 3, 5]
+        elif pa.types.is_floating(t):
+            values = [[1.0, 2.0], None, [3.0], [], [5.0]]
+            expected_values = [1.0, 2.0, 3.0, 5.0]
+        else:
+            values = [["a", "b"], None, ["c"], [], ["d"]]
+            expected_values = [b"a", b"b", b"c", b"d"]
+        expected_sparse_indices = [[0, 0], [0, 1], [2, 0], [4, 0]]
+        expected_dense_shape = [5, 2]
+        expected_output = tf.compat.v1.SparseTensorValue(
+            indices=np.array(expected_sparse_indices, dtype=np.int64),
+            dense_shape=np.array(expected_dense_shape, dtype=np.int64),
+            values=np.array(expected_values, dtype=_ARROW_TYPE_TO_NP_TYPE[t]),
+        )
+        result.append(
+            {
+                "testcase_name": f"varlen_sparse_from_list_array_{t}",
+                "tensor_representation_textpb": tensor_representation_textpb,
+                "arrow_array": pa.array(values, type=pa.list_(t)),
+                "expected_output": expected_output,
+                "expected_type_spec": tf.SparseTensorSpec(
+                    tf.TensorShape([None, None]), _ARROW_TYPE_TO_TF_TYPE[t]
+                ),
+            }
+        )
+
+    return result
 
 
 _ONE_TENSOR_TEST_CASES = (
-    _MakeDenseTensorFromListArrayTestCases() +
-    _MakeIntDefaultFilledDenseTensorFromListArrayTestCases() +
-    _MakeFloatingDefaultFilledDenseTensorFromListArrayTestCases() +
-    _MakeStringDefaultFilledDenseTensorFromListArrayTestCases() +
-    _MakeVarLenSparseTensorFromListArrayTestCases())
+    _MakeDenseTensorFromListArrayTestCases()
+    + _MakeIntDefaultFilledDenseTensorFromListArrayTestCases()
+    + _MakeFloatingDefaultFilledDenseTensorFromListArrayTestCases()
+    + _MakeStringDefaultFilledDenseTensorFromListArrayTestCases()
+    + _MakeVarLenSparseTensorFromListArrayTestCases()
+)
 
 
 def _MakeRaggedTensorDTypesTestCases():
-  result = []
-  tensor_representation_textpb = """
+    result = []
+    tensor_representation_textpb = """
   ragged_tensor {
     feature_path {
       step: "ragged_feature"
     }
   }
   """
-  for t in _ALL_SUPPORTED_VALUE_TYPES:
-    for list_type_factory in (("list", pa.list_), ("large_list",
-                                                   pa.large_list)):
-      expected_type_spec = tf.RaggedTensorSpec([None, None],
-                                               _ARROW_TYPE_TO_TF_TYPE[t],
-                                               ragged_rank=1,
-                                               row_splits_dtype=tf.int64)
-      if pa.types.is_integer(t):
-        values = [[1, 2], None, [], [3]]
-        expected_values = [1, 2, 3]
-      elif pa.types.is_floating(t):
-        values = [[1.0, 2.0], None, [], [3.0]]
-        expected_values = [1.0, 2.0, 3.0]
-      else:
-        values = [[b"a", b"b"], None, [], [b"c"]]
-        expected_values = [b"a", b"b", b"c"]
-      row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64)
-
-      if tf.executing_eagerly():
-        expected_output = tf.RaggedTensor.from_row_splits(
-            values=tf.constant(
-                expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]),
-            row_splits=row_splits)
-      else:
-        expected_output = tf.compat.v1.ragged.RaggedTensorValue(
-            values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
-            row_splits=row_splits)
-
-      result.append({
-          "testcase_name":
-              "1D_{}_{}".format(t, list_type_factory[0]),
-          "tensor_representation_textpb":
-              tensor_representation_textpb,
-          "record_batch":
-              pa.RecordBatch.from_arrays(
-                  [pa.array(values, type=list_type_factory[1](t))],
-                  ["ragged_feature"]),
-          "expected_ragged_tensor":
-              expected_output,
-          "expected_type_spec":
-              expected_type_spec,
-      })
-
-  return result
-
-
-_RAGGED_TENSOR_TEST_CASES = (
-    _MakeRaggedTensorDTypesTestCases() + [
-        dict(
-            testcase_name="Simple",
-            tensor_representation_textpb="""
+    for t in _ALL_SUPPORTED_VALUE_TYPES:
+        for list_type_factory in (("list", pa.list_), ("large_list", pa.large_list)):
+            expected_type_spec = tf.RaggedTensorSpec(
+                [None, None],
+                _ARROW_TYPE_TO_TF_TYPE[t],
+                ragged_rank=1,
+                row_splits_dtype=tf.int64,
+            )
+            if pa.types.is_integer(t):
+                values = [[1, 2], None, [], [3]]
+                expected_values = [1, 2, 3]
+            elif pa.types.is_floating(t):
+                values = [[1.0, 2.0], None, [], [3.0]]
+                expected_values = [1.0, 2.0, 3.0]
+            else:
+                values = [[b"a", b"b"], None, [], [b"c"]]
+                expected_values = [b"a", b"b", b"c"]
+            row_splits = np.asarray([0, 2, 2, 2, 3], dtype=np.int64)
+
+            if tf.executing_eagerly():
+                expected_output = tf.RaggedTensor.from_row_splits(
+                    values=tf.constant(
+                        expected_values, dtype=_ARROW_TYPE_TO_TF_TYPE[t]
+                    ),
+                    row_splits=row_splits,
+                )
+            else:
+                expected_output = tf.compat.v1.ragged.RaggedTensorValue(
+                    values=np.array(expected_values, _ARROW_TYPE_TO_NP_TYPE[t]),
+                    row_splits=row_splits,
+                )
+
+            result.append(
+                {
+                    "testcase_name": f"1D_{t}_{list_type_factory[0]}",
+                    "tensor_representation_textpb": tensor_representation_textpb,
+                    "record_batch": pa.RecordBatch.from_arrays(
+                        [pa.array(values, type=list_type_factory[1](t))],
+                        ["ragged_feature"],
+                    ),
+                    "expected_ragged_tensor": expected_output,
+                    "expected_type_spec": expected_type_spec,
+                }
+            )
+
+    return result
+
+
+_RAGGED_TENSOR_TEST_CASES = _MakeRaggedTensorDTypesTestCases() + [
+    dict(
+        testcase_name="Simple",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -420,20 +430,24 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64()))
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([1, 2, 3, 4, 5]),
-                row_splits=np.asarray([0, 1, 1, 2, 5, 5]))),
-        dict(
-            testcase_name="3D",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64()))],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([1, 2, 3, 4, 5]),
+            row_splits=np.asarray([0, 1, 1, 2, 5, 5]),
+        ),
+    ),
+    dict(
+        testcase_name="3D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -441,23 +455,32 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[[1]], None, [[2]], [[3, 4], [5]], []],
-                         type=pa.list_(pa.list_(pa.int64())))
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None]),
-                tf.int64,
-                ragged_rank=2,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=np.asarray([1, 2, 3, 4, 5]),
-                    row_splits=np.asarray([0, 1, 2, 4, 5])),
-                row_splits=np.asarray([0, 1, 1, 2, 4, 4]))),
-        dict(
-            testcase_name="StructOfList",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[[1]], None, [[2]], [[3, 4], [5]], []],
+                    type=pa.list_(pa.list_(pa.int64())),
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None]),
+            tf.int64,
+            ragged_rank=2,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
+                values=np.asarray([1, 2, 3, 4, 5]),
+                row_splits=np.asarray([0, 1, 2, 4, 5]),
+            ),
+            row_splits=np.asarray([0, 1, 1, 2, 4, 4]),
+        ),
+    ),
+    dict(
+        testcase_name="StructOfList",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -466,23 +489,31 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.StructArray.from_arrays([
-                    pa.array([[1, 2, 3], [4]], pa.list_(pa.int64())),
-                    pa.array([["a", "b", "c"], ["d"]], pa.list_(pa.binary()))
-                ], ["inner_feature", "x2"])
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([1, 2, 3, 4]),
-                row_splits=np.asarray([0, 3, 4]))),
-        dict(
-            testcase_name="ListOfStruct",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([[1, 2, 3], [4]], pa.list_(pa.int64())),
+                        pa.array([["a", "b", "c"], ["d"]], pa.list_(pa.binary())),
+                    ],
+                    ["inner_feature", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([1, 2, 3, 4]), row_splits=np.asarray([0, 3, 4])
+        ),
+    ),
+    dict(
+        testcase_name="ListOfStruct",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -491,32 +522,36 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[{
-                    "inner_feature": 1,
-                    "x2": "a"
-                }, {
-                    "inner_feature": 2,
-                    "x2": "b"
-                }], [{
-                    "inner_feature": 3,
-                    "x2": "c"
-                }]],
-                         pa.list_(
-                             pa.struct([("inner_feature", pa.int64()),
-                                        ("x2", pa.binary())])))
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([1, 2, 3]), row_splits=np.asarray([0, 2, 3
-                                                                    ]))),
-        dict(
-            testcase_name="NestedStructList",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [
+                        [
+                            {"inner_feature": 1, "x2": "a"},
+                            {"inner_feature": 2, "x2": "b"},
+                        ],
+                        [{"inner_feature": 3, "x2": "c"}],
+                    ],
+                    pa.list_(
+                        pa.struct([("inner_feature", pa.int64()), ("x2", pa.binary())])
+                    ),
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([1, 2, 3]), row_splits=np.asarray([0, 2, 3])
+        ),
+    ),
+    dict(
+        testcase_name="NestedStructList",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -526,35 +561,47 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.StructArray.from_arrays([
-                    pa.array([[{
-                        "inner_feature_2": "x",
-                        "x2": "a"
-                    }, {
-                        "inner_feature_2": "y",
-                        "x2": "b"
-                    }], [{
-                        "inner_feature_2": "z",
-                        "x2": "c"
-                    }]],
-                             pa.list_(
-                                 pa.struct([("inner_feature_2", pa.binary()),
-                                            ("x3", pa.binary())]))),
-                    pa.array([["a", "b", "c"], ["d"]], pa.list_(pa.binary()))
-                ], ["inner_feature_1", "x2"])
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None]),
-                tf.string,
-                ragged_rank=1,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([b"x", b"y", b"z"]),
-                row_splits=np.asarray([0, 2, 3]))),
-        dict(
-            testcase_name="ListStructStruct",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [
+                                [
+                                    {"inner_feature_2": "x", "x2": "a"},
+                                    {"inner_feature_2": "y", "x2": "b"},
+                                ],
+                                [{"inner_feature_2": "z", "x2": "c"}],
+                            ],
+                            pa.list_(
+                                pa.struct(
+                                    [
+                                        ("inner_feature_2", pa.binary()),
+                                        ("x3", pa.binary()),
+                                    ]
+                                )
+                            ),
+                        ),
+                        pa.array([["a", "b", "c"], ["d"]], pa.list_(pa.binary())),
+                    ],
+                    ["inner_feature_1", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None]),
+            tf.string,
+            ragged_rank=1,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([b"x", b"y", b"z"]), row_splits=np.asarray([0, 2, 3])
+        ),
+    ),
+    dict(
+        testcase_name="ListStructStruct",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -564,41 +611,48 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT32
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[{
-                    "inner_feature_1": {
-                        "inner_feature_2": 1,
-                        "x2": "a"
-                    }
-                }, {
-                    "inner_feature_1": {
-                        "inner_feature_2": 2,
-                        "x2": "b"
-                    }
-                }], [{
-                    "inner_feature_1": {
-                        "inner_feature_2": 3,
-                        "x2": "c"
-                    }
-                }]],
-                         pa.list_(
-                             pa.struct([("inner_feature_1",
-                                         pa.struct([
-                                             ("inner_feature_2", pa.int64()),
-                                             ("x2", pa.binary()),
-                                         ]))])))
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int32),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([1, 2, 3]), row_splits=np.asarray([0, 2, 3
-                                                                    ]))),
-        dict(
-            testcase_name="MixedLargeTypes",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [
+                        [
+                            {"inner_feature_1": {"inner_feature_2": 1, "x2": "a"}},
+                            {"inner_feature_1": {"inner_feature_2": 2, "x2": "b"}},
+                        ],
+                        [{"inner_feature_1": {"inner_feature_2": 3, "x2": "c"}}],
+                    ],
+                    pa.list_(
+                        pa.struct(
+                            [
+                                (
+                                    "inner_feature_1",
+                                    pa.struct(
+                                        [
+                                            ("inner_feature_2", pa.int64()),
+                                            ("x2", pa.binary()),
+                                        ]
+                                    ),
+                                )
+                            ]
+                        )
+                    ),
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int32,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([1, 2, 3]), row_splits=np.asarray([0, 2, 3])
+        ),
+    ),
+    dict(
+        testcase_name="MixedLargeTypes",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -606,56 +660,67 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[[1]], None, [[2]], [[3, 4], [5]], []],
-                         type=pa.list_(pa.large_list(pa.int64())))
-            ], ["ragged_feature"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None]),
-                tf.int64,
-                ragged_rank=2,
-                row_splits_dtype=tf.int64),
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=np.asarray([1, 2, 3, 4, 5]),
-                    row_splits=np.asarray([0, 1, 2, 4, 5])),
-                row_splits=np.asarray([0, 1, 1, 2, 4, 4]))),
-        dict(
-            testcase_name="RaggedRank1Uniform1D",
-            tensor_representation_textpb="""
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[[1]], None, [[2]], [[3, 4], [5]], []],
+                    type=pa.list_(pa.large_list(pa.int64())),
+                )
+            ],
+            ["ragged_feature"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None]),
+            tf.int64,
+            ragged_rank=2,
+            row_splits_dtype=tf.int64,
+        ),
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
+                values=np.asarray([1, 2, 3, 4, 5]),
+                row_splits=np.asarray([0, 1, 2, 4, 5]),
+            ),
+            row_splits=np.asarray([0, 1, 1, 2, 4, 4]),
+        ),
+    ),
+    dict(
+        testcase_name="RaggedRank1Uniform1D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { uniform_row_length: 2 }
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 2, 3, 4], None, [], [5, 6]],
-                         type=pa.list_(pa.int64()))
-            ], ["value"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, 2]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [1, 2],
-            #    [3, 4],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [5, 6],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([[1, 2], [3, 4], [5, 6]]),
-                row_splits=np.asarray([0, 2, 2, 2, 3])),
+        record_batch=pa.RecordBatch.from_arrays(
+            [pa.array([[1, 2, 3, 4], None, [], [5, 6]], type=pa.list_(pa.int64()))],
+            ["value"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, 2]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int64,
         ),
-        dict(
-            testcase_name="RaggedRank1Uniform2D",
-            tensor_representation_textpb="""
+        # expected: [
+        #  [
+        #    [1, 2],
+        #    [3, 4],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [5, 6],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([[1, 2], [3, 4], [5, 6]]),
+            row_splits=np.asarray([0, 2, 2, 2, 3]),
+        ),
+    ),
+    dict(
+        testcase_name="RaggedRank1Uniform2D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { uniform_row_length: 2 }
@@ -663,71 +728,84 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 1, 2, 2, 3, 3, 4, 4], None, [], [5, 5, 6, 6]],
-                         type=pa.list_(pa.int64()))
-            ], ["value"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, 2, 2]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [[1, 1], [2, 2]],
-            #    [[3, 3], [4, 4]],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [[5, 5], [6, 6]],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([[[1, 1], [2, 2]], [[3, 3], [4, 4]],
-                                   [[5, 5], [6, 6]]]),
-                row_splits=np.asarray([0, 2, 2, 2, 3])),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 1, 2, 2, 3, 3, 4, 4], None, [], [5, 5, 6, 6]],
+                    type=pa.list_(pa.int64()),
+                )
+            ],
+            ["value"],
         ),
-        dict(
-            testcase_name="RaggedRank2",
-            tensor_representation_textpb="""
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, 2, 2]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [[1, 1], [2, 2]],
+        #    [[3, 3], [4, 4]],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [[5, 5], [6, 6]],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([[[1, 1], [2, 2]], [[3, 3], [4, 4]], [[5, 5], [6, 6]]]),
+            row_splits=np.asarray([0, 2, 2, 2, 3]),
+        ),
+    ),
+    dict(
+        testcase_name="RaggedRank2",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { row_length: "row_length" }
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6]],
-                         type=pa.list_(pa.int64())),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 2, 3, 4, 5, 6], None, [], [5, 6]], type=pa.list_(pa.int64())
+                ),
                 pa.array([[4, 2], None, [], [1, 1]], type=pa.list_(pa.int64())),
-            ], ["value", "row_length"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None]),
-                tf.int64,
-                ragged_rank=2,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [1, 2, 3, 4],
-            #    [5, 6],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [5],
-            #    [6],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=np.asarray([1, 2, 3, 4, 5, 6, 5, 6]),
-                    row_splits=np.asarray([0, 4, 6, 7, 8])),
-                row_splits=np.asarray([0, 2, 2, 2, 4])),
+            ],
+            ["value", "row_length"],
         ),
-        dict(
-            testcase_name="RaggedRank2Uniform1D",
-            tensor_representation_textpb="""
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None]),
+            tf.int64,
+            ragged_rank=2,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [1, 2, 3, 4],
+        #    [5, 6],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [5],
+        #    [6],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
+                values=np.asarray([1, 2, 3, 4, 5, 6, 5, 6]),
+                row_splits=np.asarray([0, 4, 6, 7, 8]),
+            ),
+            row_splits=np.asarray([0, 2, 2, 2, 4]),
+        ),
+    ),
+    dict(
+        testcase_name="RaggedRank2Uniform1D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { row_length: "row_length" }
@@ -735,43 +813,50 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6]],
-                         type=pa.list_(pa.int64())),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 2, 3, 4, 5, 6], None, [], [5, 6]], type=pa.list_(pa.int64())
+                ),
                 pa.array([[2, 1], None, [], [1]], type=pa.list_(pa.int64())),
-            ], ["value", "row_length"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None, 2]),
-                tf.int64,
-                ragged_rank=2,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [
-            #      [1, 2],
-            #      [3, 4],
-            #    ],
-            #    [
-            #      [5, 6],
-            #    ],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [
-            #      [5, 6],
-            #    ],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6]]),
-                    row_splits=np.asarray([0, 2, 3, 4])),
-                row_splits=np.asarray([0, 2, 2, 2, 3])),
+            ],
+            ["value", "row_length"],
         ),
-        dict(
-            testcase_name="RaggedRank3UniformRaggedUniform",
-            tensor_representation_textpb="""
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None, 2]),
+            tf.int64,
+            ragged_rank=2,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [
+        #      [1, 2],
+        #      [3, 4],
+        #    ],
+        #    [
+        #      [5, 6],
+        #    ],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [
+        #      [5, 6],
+        #    ],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
+                values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6]]),
+                row_splits=np.asarray([0, 2, 3, 4]),
+            ),
+            row_splits=np.asarray([0, 2, 2, 2, 3]),
+        ),
+    ),
+    dict(
+        testcase_name="RaggedRank3UniformRaggedUniform",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { uniform_row_length: 2 }
@@ -780,53 +865,61 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
-                         type=pa.list_(pa.int64())),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
+                    type=pa.list_(pa.int64()),
+                ),
                 pa.array([[2, 1], None, [], [1, 1]], type=pa.list_(pa.int64())),
-            ], ["value", "row_length"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None, None, 2]),
-                tf.int64,
-                ragged_rank=3,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [
-            #      [
-            #        [1, 2],
-            #        [3, 4],
-            #      ],
-            #      [
-            #        [5, 6],
-            #      ],
-            #    ],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [
-            #      [
-            #        [5, 6],
-            #      ],
-            #      [
-            #        [7, 8],
-            #      ],
-            #    ],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            ],
+            ["value", "row_length"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None, None, 2]),
+            tf.int64,
+            ragged_rank=3,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [
+        #      [
+        #        [1, 2],
+        #        [3, 4],
+        #      ],
+        #      [
+        #        [5, 6],
+        #      ],
+        #    ],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [
+        #      [
+        #        [5, 6],
+        #      ],
+        #      [
+        #        [7, 8],
+        #      ],
+        #    ],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
                 values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=tf.compat.v1.ragged.RaggedTensorValue(
-                        values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6],
-                                           [7, 8]]),
-                        row_splits=np.asarray([0, 2, 3, 4, 5])),
-                    row_splits=np.asarray([0, 2, 4])),
-                row_splits=np.asarray([0, 1, 1, 1, 2])),
+                    values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6], [7, 8]]),
+                    row_splits=np.asarray([0, 2, 3, 4, 5]),
+                ),
+                row_splits=np.asarray([0, 2, 4]),
+            ),
+            row_splits=np.asarray([0, 1, 1, 1, 2]),
         ),
-        dict(
-            testcase_name="RaggedRank4RaggedUniformRagged",
-            tensor_representation_textpb="""
+    ),
+    dict(
+        testcase_name="RaggedRank4RaggedUniformRagged",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path { step: "value" }
           partition { row_length: "row_length_2" }
@@ -836,60 +929,69 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
-                         type=pa.list_(pa.int64())),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
+                    type=pa.list_(pa.int64()),
+                ),
                 pa.array([[2, 1], None, [], [1, 1]], type=pa.list_(pa.int64())),
                 pa.array([[1], None, [], [1]], type=pa.list_(pa.int64())),
-            ], ["value", "row_length", "row_length_2"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None, None, None, 2]),
-                tf.int64,
-                ragged_rank=4,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [
-            #      [
-            #        [
-            #          [1, 2],
-            #          [3, 4],
-            #        ],
-            #        [
-            #          [5, 6],
-            #        ],
-            #      ],
-            #    ],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [
-            #      [
-            #        [
-            #          [5, 6],
-            #        ],
-            #        [
-            #          [7, 8],
-            #        ],
-            #      ],
-            #    ],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            ],
+            ["value", "row_length", "row_length_2"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None, None, None, 2]),
+            tf.int64,
+            ragged_rank=4,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [
+        #      [
+        #        [
+        #          [1, 2],
+        #          [3, 4],
+        #        ],
+        #        [
+        #          [5, 6],
+        #        ],
+        #      ],
+        #    ],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [
+        #      [
+        #        [
+        #          [5, 6],
+        #        ],
+        #        [
+        #          [7, 8],
+        #        ],
+        #      ],
+        #    ],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
                 values=tf.compat.v1.ragged.RaggedTensorValue(
                     values=tf.compat.v1.ragged.RaggedTensorValue(
-                        values=tf.compat.v1.ragged.RaggedTensorValue(
-                            values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6],
-                                               [7, 8]]),
-                            row_splits=np.asarray([0, 2, 3, 4, 5])),
-                        row_splits=np.asarray([0, 2, 4])),
-                    row_splits=np.asarray([0, 1, 2])),
-                row_splits=np.asarray([0, 1, 1, 1, 2])),
+                        values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6], [7, 8]]),
+                        row_splits=np.asarray([0, 2, 3, 4, 5]),
+                    ),
+                    row_splits=np.asarray([0, 2, 4]),
+                ),
+                row_splits=np.asarray([0, 1, 2]),
+            ),
+            row_splits=np.asarray([0, 1, 1, 1, 2]),
         ),
-        dict(
-            testcase_name="Struct_RaggedRank1Uniform1D",
-            tensor_representation_textpb="""
+    ),
+    dict(
+        testcase_name="Struct_RaggedRank1Uniform1D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "parent"
@@ -899,35 +1001,44 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.StructArray.from_arrays([
-                    pa.array([[1, 2, 3, 4], None, [], [5, 6]],
-                             type=pa.list_(pa.int64()))
-                ], ["value"])
-            ], ["parent"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, 2]),
-                tf.int64,
-                ragged_rank=1,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [1, 2],
-            #    [3, 4],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [5, 6],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=np.asarray([[1, 2], [3, 4], [5, 6]]),
-                row_splits=np.asarray([0, 2, 2, 2, 3])),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[1, 2, 3, 4], None, [], [5, 6]], type=pa.list_(pa.int64())
+                        )
+                    ],
+                    ["value"],
+                )
+            ],
+            ["parent"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, 2]),
+            tf.int64,
+            ragged_rank=1,
+            row_splits_dtype=tf.int64,
         ),
-        dict(
-            testcase_name="Struct_RaggedRank2",
-            tensor_representation_textpb="""
+        # expected: [
+        #  [
+        #    [1, 2],
+        #    [3, 4],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [5, 6],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=np.asarray([[1, 2], [3, 4], [5, 6]]),
+            row_splits=np.asarray([0, 2, 2, 2, 3]),
+        ),
+    ),
+    dict(
+        testcase_name="Struct_RaggedRank2",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "parent"
@@ -937,39 +1048,50 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.StructArray.from_arrays([
-                    pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6]],
-                             type=pa.list_(pa.int64())),
-                    pa.array([[4, 2], None, [], [1, 1]], type=pa.list_(pa.int64())),
-                ], ["value", "row_length"])
-            ], ["parent"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None]),
-                tf.int64,
-                ragged_rank=2,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [1, 2, 3, 4],
-            #    [5, 6],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [5],
-            #    [6],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
-                values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=np.asarray([1, 2, 3, 4, 5, 6, 5, 6]),
-                    row_splits=np.asarray([0, 4, 6, 7, 8])),
-                row_splits=np.asarray([0, 2, 2, 2, 4])),
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[1, 2, 3, 4, 5, 6], None, [], [5, 6]],
+                            type=pa.list_(pa.int64()),
+                        ),
+                        pa.array([[4, 2], None, [], [1, 1]], type=pa.list_(pa.int64())),
+                    ],
+                    ["value", "row_length"],
+                )
+            ],
+            ["parent"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None]),
+            tf.int64,
+            ragged_rank=2,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [1, 2, 3, 4],
+        #    [5, 6],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [5],
+        #    [6],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
+                values=np.asarray([1, 2, 3, 4, 5, 6, 5, 6]),
+                row_splits=np.asarray([0, 4, 6, 7, 8]),
+            ),
+            row_splits=np.asarray([0, 2, 2, 2, 4]),
         ),
-        dict(
-            testcase_name="Struct_RaggedRank4RaggedUniformRagged",
-            tensor_representation_textpb="""
+    ),
+    dict(
+        testcase_name="Struct_RaggedRank4RaggedUniformRagged",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "parent"
@@ -982,62 +1104,74 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.StructArray.from_arrays([
-                    pa.array([[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
-                             type=pa.list_(pa.int64())),
-                    pa.array([[2, 1], None, [], [1, 1]], type=pa.list_(pa.int64())),
-                    pa.array([[1], None, [], [1]], type=pa.list_(pa.int64())),
-                ], ["value", "row_length", "row_length_2"]),
-            ], ["parent"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None, None, None, 2]),
-                tf.int64,
-                ragged_rank=4,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [
-            #      [
-            #        [
-            #          [1, 2],
-            #          [3, 4],
-            #        ],
-            #        [
-            #          [5, 6],
-            #        ],
-            #      ],
-            #    ],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [
-            #      [
-            #        [
-            #          [5, 6],
-            #        ],
-            #        [
-            #          [7, 8],
-            #        ],
-            #      ],
-            #    ],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array(
+                            [[1, 2, 3, 4, 5, 6], None, [], [5, 6, 7, 8]],
+                            type=pa.list_(pa.int64()),
+                        ),
+                        pa.array([[2, 1], None, [], [1, 1]], type=pa.list_(pa.int64())),
+                        pa.array([[1], None, [], [1]], type=pa.list_(pa.int64())),
+                    ],
+                    ["value", "row_length", "row_length_2"],
+                ),
+            ],
+            ["parent"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None, None, None, 2]),
+            tf.int64,
+            ragged_rank=4,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [
+        #      [
+        #        [
+        #          [1, 2],
+        #          [3, 4],
+        #        ],
+        #        [
+        #          [5, 6],
+        #        ],
+        #      ],
+        #    ],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [
+        #      [
+        #        [
+        #          [5, 6],
+        #        ],
+        #        [
+        #          [7, 8],
+        #        ],
+        #      ],
+        #    ],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
                 values=tf.compat.v1.ragged.RaggedTensorValue(
                     values=tf.compat.v1.ragged.RaggedTensorValue(
-                        values=tf.compat.v1.ragged.RaggedTensorValue(
-                            values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6],
-                                               [7, 8]]),
-                            row_splits=np.asarray([0, 2, 3, 4, 5])),
-                        row_splits=np.asarray([0, 2, 4])),
-                    row_splits=np.asarray([0, 1, 2])),
-                row_splits=np.asarray([0, 1, 1, 1, 2])),
+                        values=np.asarray([[1, 2], [3, 4], [5, 6], [5, 6], [7, 8]]),
+                        row_splits=np.asarray([0, 2, 3, 4, 5]),
+                    ),
+                    row_splits=np.asarray([0, 2, 4]),
+                ),
+                row_splits=np.asarray([0, 1, 2]),
+            ),
+            row_splits=np.asarray([0, 1, 1, 1, 2]),
         ),
-        dict(
-            testcase_name="ListStruct_RaggedRank1Uniform1D",
-            tensor_representation_textpb="""
+    ),
+    dict(
+        testcase_name="ListStruct_RaggedRank1Uniform1D",
+        tensor_representation_textpb="""
         ragged_tensor {
           feature_path {
             step: "parent"
@@ -1048,81 +1182,99 @@ def _MakeRaggedTensorDTypesTestCases():
           row_partition_dtype: INT64
         }
         """,
-            record_batch=pa.RecordBatch.from_arrays([
-                pa.array([
-                    [
-                        {
-                            "struct": {
-                                "value": [1, 2, 3],
-                                "row_length": [2, 1],
-                            }
-                        },
-                        {
-                            "struct": {
-                                "value": [1],
-                                "row_length": [1],
-                            }
-                        },
-                    ],
-                    None,
-                    [],
+        record_batch=pa.RecordBatch.from_arrays(
+            [
+                pa.array(
                     [
-                        {
-                            "struct": {
-                                "value": [2, 3, 4],
-                                "row_length": [1, 2],
-                            }
-                        },
+                        [
+                            {
+                                "struct": {
+                                    "value": [1, 2, 3],
+                                    "row_length": [2, 1],
+                                }
+                            },
+                            {
+                                "struct": {
+                                    "value": [1],
+                                    "row_length": [1],
+                                }
+                            },
+                        ],
+                        None,
+                        [],
+                        [
+                            {
+                                "struct": {
+                                    "value": [2, 3, 4],
+                                    "row_length": [1, 2],
+                                }
+                            },
+                        ],
                     ],
-                ],
-                         pa.list_(
-                             pa.struct([("struct",
-                                         pa.struct([
-                                             ("value", pa.list_(pa.int64())),
-                                             ("row_length", pa.list_(pa.int64()))
-                                         ]))])))
-            ], ["parent"]),
-            expected_type_spec=tf.RaggedTensorSpec(
-                tf.TensorShape([None, None, None, None]),
-                tf.int64,
-                ragged_rank=3,
-                row_splits_dtype=tf.int64),
-            # expected: [
-            #  [
-            #    [
-            #      [1, 2],
-            #      [3],
-            #    ],
-            #    [
-            #      [1],
-            #    ],
-            #  ],
-            #  [],
-            #  [],
-            #  [
-            #    [
-            #      [2],
-            #      [3, 4],
-            #    ],
-            #  ],
-            # ]
-            expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+                    pa.list_(
+                        pa.struct(
+                            [
+                                (
+                                    "struct",
+                                    pa.struct(
+                                        [
+                                            ("value", pa.list_(pa.int64())),
+                                            ("row_length", pa.list_(pa.int64())),
+                                        ]
+                                    ),
+                                )
+                            ]
+                        )
+                    ),
+                )
+            ],
+            ["parent"],
+        ),
+        expected_type_spec=tf.RaggedTensorSpec(
+            tf.TensorShape([None, None, None, None]),
+            tf.int64,
+            ragged_rank=3,
+            row_splits_dtype=tf.int64,
+        ),
+        # expected: [
+        #  [
+        #    [
+        #      [1, 2],
+        #      [3],
+        #    ],
+        #    [
+        #      [1],
+        #    ],
+        #  ],
+        #  [],
+        #  [],
+        #  [
+        #    [
+        #      [2],
+        #      [3, 4],
+        #    ],
+        #  ],
+        # ]
+        expected_ragged_tensor=tf.compat.v1.ragged.RaggedTensorValue(
+            values=tf.compat.v1.ragged.RaggedTensorValue(
                 values=tf.compat.v1.ragged.RaggedTensorValue(
-                    values=tf.compat.v1.ragged.RaggedTensorValue(
-                        values=np.asarray([1, 2, 3, 1, 2, 3, 4]),
-                        row_splits=np.asarray([0, 2, 3, 4, 5, 7])),
-                    row_splits=np.asarray([0, 2, 3, 5])),
-                row_splits=np.asarray([0, 2, 2, 2, 3]),
+                    values=np.asarray([1, 2, 3, 1, 2, 3, 4]),
+                    row_splits=np.asarray([0, 2, 3, 4, 5, 7]),
+                ),
+                row_splits=np.asarray([0, 2, 3, 5]),
             ),
+            row_splits=np.asarray([0, 2, 2, 2, 3]),
         ),
-    ])
+    ),
+]
 
 _INVALID_DEFAULT_VALUE_TEST_CASES = [
     dict(
         testcase_name="default_value_not_set",
         value_type=pa.int64(),
         default_value_pbtxt="",
-        exception_regexp="Incompatible default value"),
+        exception_regexp="Incompatible default value",
+    ),
     dict(
         testcase_name="mismatch_type",
         value_type=pa.binary(),
@@ -1163,7 +1315,8 @@ def _MakeRaggedTensorDTypesTestCases():
         arrow_schema={
             "key": pa.list_(pa.int64()),
             "value": pa.list_(pa.int64()),
-        }),
+        },
+    ),
     dict(
         testcase_name="invalid_index_column_type",
         tensor_representation_textpb="""
@@ -1180,37 +1333,44 @@ def _MakeRaggedTensorDTypesTestCases():
         arrow_schema={
             "key": pa.list_(pa.float32()),
             "value": pa.list_(pa.int64()),
-        }),
+        },
+    ),
 ]
 
 
 class TensorAdapterTest(parameterized.TestCase, tf.test.TestCase):
-
-  def assertSparseAllEqual(self, a, b):
-    self.assertAllEqual(a.indices, b.indices)
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.dense_shape, b.dense_shape)
-
-  def assertRaggedAllEqual(self, a, b):
-    self.assertAllEqual(a.values, b.values)
-    self.assertAllEqual(a.row_splits, b.row_splits)
-
-  def assertNonEager(self, v):
-    """Asserts `v` is not a Eager tensor."""
-    self.assertIsInstance(v, (tf.compat.v1.ragged.RaggedTensorValue,
-                              tf.compat.v1.SparseTensorValue, np.ndarray))
-
-  def assertAdapterCanProduceNonEagerInEagerMode(self, adapter, record_batch):
-    if tf.executing_eagerly():
-      converted_non_eager = adapter.ToBatchTensors(
-          record_batch, produce_eager_tensors=False)
-      for v in converted_non_eager.values():
-        self.assertNonEager(v)
-
-  @test_util.deprecated_graph_mode_only
-  def testRaiseOnRequestingEagerTensorsInGraphMode(self):
-    tensor_representation = text_format.Parse(
-        """
+    def assertSparseAllEqual(self, a, b):
+        self.assertAllEqual(a.indices, b.indices)
+        self.assertAllEqual(a.values, b.values)
+        self.assertAllEqual(a.dense_shape, b.dense_shape)
+
+    def assertRaggedAllEqual(self, a, b):
+        self.assertAllEqual(a.values, b.values)
+        self.assertAllEqual(a.row_splits, b.row_splits)
+
+    def assertNonEager(self, v):
+        """Asserts `v` is not a Eager tensor."""
+        self.assertIsInstance(
+            v,
+            (
+                tf.compat.v1.ragged.RaggedTensorValue,
+                tf.compat.v1.SparseTensorValue,
+                np.ndarray,
+            ),
+        )
+
+    def assertAdapterCanProduceNonEagerInEagerMode(self, adapter, record_batch):
+        if tf.executing_eagerly():
+            converted_non_eager = adapter.ToBatchTensors(
+                record_batch, produce_eager_tensors=False
+            )
+            for v in converted_non_eager.values():
+                self.assertNonEager(v)
+
+    @test_util.deprecated_graph_mode_only
+    def testRaiseOnRequestingEagerTensorsInGraphMode(self):
+        tensor_representation = text_format.Parse(
+            """
   sparse_tensor {
     index_column_names: ["key"]
     value_column_name: "value"
@@ -1220,136 +1380,169 @@ def testRaiseOnRequestingEagerTensorsInGraphMode(self):
       }
     }
   }
-  """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays(
-        [pa.array([[1]]), pa.array([[2]])], ["key", "value"])
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    with self.assertRaisesRegex(RuntimeError, "eager mode was not enabled"):
-      adapter.ToBatchTensors(record_batch, produce_eager_tensors=True)
-
-  @parameterized.named_parameters(*_ONE_TENSOR_TEST_CASES)
-  @test_util.run_in_graph_and_eager_modes
-  def testOneTensorFromOneColumn(self, tensor_representation_textpb,
-                                 arrow_array, expected_type_spec,
-                                 expected_output):
-
-    tensor_representation = text_format.Parse(tensor_representation_textpb,
-                                              schema_pb2.TensorRepresentation())
-    column_name = None
-    if tensor_representation.HasField("dense_tensor"):
-      column_name = tensor_representation.dense_tensor.column_name
-    if tensor_representation.HasField("varlen_sparse_tensor"):
-      column_name = tensor_representation.varlen_sparse_tensor.column_name
-
-    record_batch = pa.RecordBatch.from_arrays([arrow_array], [column_name])
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    self.assertEqual(expected_type_spec, adapter.TypeSpecs()["output"])
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 1)
-    self.assertIn("output", converted)
-    actual_output = converted["output"]
-    if tf.executing_eagerly():
-      self.assertTrue(
-          expected_type_spec.is_compatible_with(actual_output),
-          "{} is not compatible with spec {}".format(actual_output,
-                                                     expected_type_spec))
-    if isinstance(expected_output,
-                  (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
-      self.assertIsInstance(actual_output,
-                            (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-      self.assertSparseAllEqual(expected_output, actual_output)
-    else:
-      self.assertAllEqual(expected_output, actual_output)
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @test_util.run_in_graph_and_eager_modes
-  def test0DSparseTensor(self):
-    values = [[1.0], None, [], [3.0]]
-    expected_values = [1.0, 3.0]
-    indices = [[99], None, [], [8]]
-
-    if tf.executing_eagerly():
-      expected_output = tf.sparse.SparseTensor(
-          indices=[[0], [3]],
-          values=tf.constant(expected_values, dtype=tf.float32),
-          dense_shape=(4,))
-    else:
-      expected_output = tf.compat.v1.SparseTensorValue(
-          indices=[[0], [3]],
-          values=np.array(expected_values, np.float32),
-          dense_shape=(4,))
-
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array(indices, type=("list", pa.list_)[1](pa.int64())),
-        pa.array(values, type=("list", pa.list_)[1](pa.float32()))
-    ], ["key", "value"])
-    tensor_representation = text_format.Parse(
-        """
+  """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [pa.array([[1]]), pa.array([[2]])], ["key", "value"]
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        with self.assertRaisesRegex(RuntimeError, "eager mode was not enabled"):
+            adapter.ToBatchTensors(record_batch, produce_eager_tensors=True)
+
+    @parameterized.named_parameters(*_ONE_TENSOR_TEST_CASES)
+    @test_util.run_in_graph_and_eager_modes
+    def testOneTensorFromOneColumn(
+        self,
+        tensor_representation_textpb,
+        arrow_array,
+        expected_type_spec,
+        expected_output,
+    ):
+        tensor_representation = text_format.Parse(
+            tensor_representation_textpb, schema_pb2.TensorRepresentation()
+        )
+        column_name = None
+        if tensor_representation.HasField("dense_tensor"):
+            column_name = tensor_representation.dense_tensor.column_name
+        if tensor_representation.HasField("varlen_sparse_tensor"):
+            column_name = tensor_representation.varlen_sparse_tensor.column_name
+
+        record_batch = pa.RecordBatch.from_arrays([arrow_array], [column_name])
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        self.assertEqual(expected_type_spec, adapter.TypeSpecs()["output"])
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 1)
+        self.assertIn("output", converted)
+        actual_output = converted["output"]
+        if tf.executing_eagerly():
+            self.assertTrue(
+                expected_type_spec.is_compatible_with(actual_output),
+                f"{actual_output} is not compatible with spec {expected_type_spec}",
+            )
+        if isinstance(
+            expected_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        ):
+            self.assertIsInstance(
+                actual_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+            )
+            self.assertSparseAllEqual(expected_output, actual_output)
+        else:
+            self.assertAllEqual(expected_output, actual_output)
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @test_util.run_in_graph_and_eager_modes
+    def test0DSparseTensor(self):
+        values = [[1.0], None, [], [3.0]]
+        expected_values = [1.0, 3.0]
+        indices = [[99], None, [], [8]]
+
+        if tf.executing_eagerly():
+            expected_output = tf.sparse.SparseTensor(
+                indices=[[0], [3]],
+                values=tf.constant(expected_values, dtype=tf.float32),
+                dense_shape=(4,),
+            )
+        else:
+            expected_output = tf.compat.v1.SparseTensorValue(
+                indices=[[0], [3]],
+                values=np.array(expected_values, np.float32),
+                dense_shape=(4,),
+            )
+
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(indices, type=("list", pa.list_)[1](pa.int64())),
+                pa.array(values, type=("list", pa.list_)[1](pa.float32())),
+            ],
+            ["key", "value"],
+        )
+        tensor_representation = text_format.Parse(
+            """
               sparse_tensor {
                 value_column_name: "value"
                 dense_shape { }
               }
-            """, schema_pb2.TensorRepresentation())
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-
-    expected_type_spec = tf.SparseTensorSpec([None], tf.float32)
-
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 1)
-    self.assertIn("output", converted)
-    actual_output = converted["output"]
-    self.assertIsInstance(actual_output,
-                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-    if tf.executing_eagerly():
-      self.assertTrue(
-          expected_type_spec.is_compatible_with(actual_output),
-          "{} is not compatible with spec {}".format(actual_output,
-                                                     expected_type_spec))
-
-    self.assertSparseAllEqual(expected_output, actual_output)
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @parameterized.named_parameters(*_Make1DSparseTensorTestCases())
-  @test_util.run_in_graph_and_eager_modes
-  def test1DSparseTensor(self, tensor_representation_textpb, record_batch,
-                         expected_type_spec, expected_output):
-    tensor_representation = text_format.Parse(tensor_representation_textpb,
-                                              schema_pb2.TensorRepresentation())
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 1)
-    self.assertIn("output", converted)
-    actual_output = converted["output"]
-    self.assertIsInstance(actual_output,
-                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-    if tf.executing_eagerly():
-      self.assertTrue(
-          expected_type_spec.is_compatible_with(actual_output),
-          "{} is not compatible with spec {}".format(actual_output,
-                                                     expected_type_spec))
-
-    self.assertSparseAllEqual(expected_output, actual_output)
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @parameterized.named_parameters(
-      dict(testcase_name="known_dense_shape", is_dense_shape_known=True),
-      dict(testcase_name="unknown_dense_shape", is_dense_shape_known=False))
-  @test_util.run_in_graph_and_eager_modes
-  def test2DSparseTensor(self, is_dense_shape_known):
-    dense_shape = [10, 20] if is_dense_shape_known else [-1, -1]
-    tensor_representation = text_format.Parse(
-        f"""
+            """,
+            schema_pb2.TensorRepresentation(),
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+
+        expected_type_spec = tf.SparseTensorSpec([None], tf.float32)
+
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 1)
+        self.assertIn("output", converted)
+        actual_output = converted["output"]
+        self.assertIsInstance(
+            actual_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        )
+        if tf.executing_eagerly():
+            self.assertTrue(
+                expected_type_spec.is_compatible_with(actual_output),
+                f"{actual_output} is not compatible with spec {expected_type_spec}",
+            )
+
+        self.assertSparseAllEqual(expected_output, actual_output)
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @parameterized.named_parameters(*_Make1DSparseTensorTestCases())
+    @test_util.run_in_graph_and_eager_modes
+    def test1DSparseTensor(
+        self,
+        tensor_representation_textpb,
+        record_batch,
+        expected_type_spec,
+        expected_output,
+    ):
+        tensor_representation = text_format.Parse(
+            tensor_representation_textpb, schema_pb2.TensorRepresentation()
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 1)
+        self.assertIn("output", converted)
+        actual_output = converted["output"]
+        self.assertIsInstance(
+            actual_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        )
+        if tf.executing_eagerly():
+            self.assertTrue(
+                expected_type_spec.is_compatible_with(actual_output),
+                f"{actual_output} is not compatible with spec {expected_type_spec}",
+            )
+
+        self.assertSparseAllEqual(expected_output, actual_output)
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @parameterized.named_parameters(
+        dict(testcase_name="known_dense_shape", is_dense_shape_known=True),
+        dict(testcase_name="unknown_dense_shape", is_dense_shape_known=False),
+    )
+    @test_util.run_in_graph_and_eager_modes
+    def test2DSparseTensor(self, is_dense_shape_known):
+        dense_shape = [10, 20] if is_dense_shape_known else [-1, -1]
+        tensor_representation = text_format.Parse(
+            f"""
         sparse_tensor {{
           value_column_name: "values"
           index_column_names: ["d0", "d1"]
@@ -1362,40 +1555,46 @@ def test2DSparseTensor(self, is_dense_shape_known):
             }}
           }}
         }}
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays(
-        [
-            pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(
-                pa.int64())),
-            # Also test that the index column can be of an integral type other
-            # than int64.
-            pa.array([[9], None, [9], [7, 8, 9], []],
-                     type=pa.list_(pa.uint32())),
-            pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64()))
-        ],
-        ["values", "d0", "d1"])
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 1)
-    self.assertIn("output", converted)
-    actual_output = converted["output"]
-    self.assertIsInstance(actual_output,
-                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-    self.assertSparseAllEqual(
-        tf.compat.v1.SparseTensorValue(
-            dense_shape=[5]+dense_shape,
-            indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]],
-            values=tf.convert_to_tensor([1, 2, 3, 4, 5], dtype=tf.int64)),
-        actual_output)
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testSparseTensorsReferSameColumns(self):
-    tensor_representation1 = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64())),
+                # Also test that the index column can be of an integral type other
+                # than int64.
+                pa.array([[9], None, [9], [7, 8, 9], []], type=pa.list_(pa.uint32())),
+                pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64())),
+            ],
+            ["values", "d0", "d1"],
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 1)
+        self.assertIn("output", converted)
+        actual_output = converted["output"]
+        self.assertIsInstance(
+            actual_output, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        )
+        self.assertSparseAllEqual(
+            tf.compat.v1.SparseTensorValue(
+                dense_shape=[5] + dense_shape,
+                indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]],
+                values=tf.convert_to_tensor([1, 2, 3, 4, 5], dtype=tf.int64),
+            ),
+            actual_output,
+        )
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @test_util.run_in_graph_and_eager_modes
+    def testSparseTensorsReferSameColumns(self):
+        tensor_representation1 = text_format.Parse(
+            """
         sparse_tensor {
           value_column_name: "values"
           index_column_names: ["d0", "d1"]
@@ -1408,9 +1607,11 @@ def testSparseTensorsReferSameColumns(self):
             }
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    tensor_representation2 = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        tensor_representation2 = text_format.Parse(
+            """
         sparse_tensor {
           value_column_name: "d1"
           index_column_names: ["d0"]
@@ -1420,118 +1621,155 @@ def testSparseTensorsReferSameColumns(self):
             }
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64())),
-        pa.array([[9], None, [9], [7, 8, 9], []], type=pa.list_(pa.int64())),
-        pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64()))
-    ], ["values", "d0", "d1"])
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema, {
-            "output1": tensor_representation1,
-            "output2": tensor_representation2
-        }))
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 2)
-    self.assertIn("output1", converted)
-    self.assertIn("output2", converted)
-    actual_output1 = converted["output1"]
-    actual_output2 = converted["output2"]
-    self.assertIsInstance(actual_output1,
-                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-    self.assertSparseAllEqual(
-        tf.compat.v1.SparseTensorValue(
-            dense_shape=[5, 10, 20],
-            indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]],
-            values=tf.constant([1, 2, 3, 4, 5], dtype=tf.int64)),
-        actual_output1)
-    self.assertIsInstance(actual_output2,
-                          (tf.SparseTensor, tf.compat.v1.SparseTensorValue))
-    self.assertSparseAllEqual(
-        tf.compat.v1.SparseTensorValue(
-            dense_shape=[5, 10],
-            indices=[[0, 9], [2, 9], [3, 7], [3, 8], [3, 9]],
-            values=tf.constant([0, 0, 0, 1, 2], dtype=tf.int64)),
-        actual_output2)
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @parameterized.named_parameters(_RAGGED_TENSOR_TEST_CASES)
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensor(self, tensor_representation_textpb, record_batch,
-                       expected_type_spec, expected_ragged_tensor):
-    tensor_representation = text_format.Parse(tensor_representation_textpb,
-                                              schema_pb2.TensorRepresentation())
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    converted = adapter.ToBatchTensors(record_batch)
-    self.assertLen(converted, 1)
-    self.assertIn("output", converted)
-    actual_output = converted["output"]
-    self.assertIsInstance(
-        actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue))
-    if tf.executing_eagerly():
-      self.assertEqual(adapter.TypeSpecs()["output"], expected_type_spec)
-      self.assertTrue(
-          expected_type_spec.is_compatible_with(actual_output),
-          "{} is not compatible with spec {}".format(actual_output,
-                                                     expected_type_spec))
-
-    print("actual:", actual_output)
-    print("expected:", expected_ragged_tensor)
-    self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorFromStructArrayWithNoNestedness(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64())),
+                pa.array([[9], None, [9], [7, 8, 9], []], type=pa.list_(pa.int64())),
+                pa.array([[0], None, [0], [0, 1, 2], []], type=pa.list_(pa.int64())),
+            ],
+            ["values", "d0", "d1"],
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema,
+                {"output1": tensor_representation1, "output2": tensor_representation2},
+            )
+        )
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 2)
+        self.assertIn("output1", converted)
+        self.assertIn("output2", converted)
+        actual_output1 = converted["output1"]
+        actual_output2 = converted["output2"]
+        self.assertIsInstance(
+            actual_output1, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        )
+        self.assertSparseAllEqual(
+            tf.compat.v1.SparseTensorValue(
+                dense_shape=[5, 10, 20],
+                indices=[[0, 9, 0], [2, 9, 0], [3, 7, 0], [3, 8, 1], [3, 9, 2]],
+                values=tf.constant([1, 2, 3, 4, 5], dtype=tf.int64),
+            ),
+            actual_output1,
+        )
+        self.assertIsInstance(
+            actual_output2, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)
+        )
+        self.assertSparseAllEqual(
+            tf.compat.v1.SparseTensorValue(
+                dense_shape=[5, 10],
+                indices=[[0, 9], [2, 9], [3, 7], [3, 8], [3, 9]],
+                values=tf.constant([0, 0, 0, 1, 2], dtype=tf.int64),
+            ),
+            actual_output2,
+        )
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @parameterized.named_parameters(_RAGGED_TENSOR_TEST_CASES)
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensor(
+        self,
+        tensor_representation_textpb,
+        record_batch,
+        expected_type_spec,
+        expected_ragged_tensor,
+    ):
+        tensor_representation = text_format.Parse(
+            tensor_representation_textpb, schema_pb2.TensorRepresentation()
+        )
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        converted = adapter.ToBatchTensors(record_batch)
+        self.assertLen(converted, 1)
+        self.assertIn("output", converted)
+        actual_output = converted["output"]
+        self.assertIsInstance(
+            actual_output, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)
+        )
+        if tf.executing_eagerly():
+            self.assertEqual(adapter.TypeSpecs()["output"], expected_type_spec)
+            self.assertTrue(
+                expected_type_spec.is_compatible_with(actual_output),
+                f"{actual_output} is not compatible with spec {expected_type_spec}",
+            )
+
+        print("actual:", actual_output)
+        print("expected:", expected_ragged_tensor)
+        self.assertRaggedAllEqual(actual_output, expected_ragged_tensor)
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorFromStructArrayWithNoNestedness(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
             step: "inner_feature"
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.StructArray.from_arrays(
-            [pa.array([1, 2, 3]),
-             pa.array(["a", "b", "c"])], ["inner_feature", "x2"])
-    ], ["ragged_feature"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorStructTypeInvalidSteps(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [pa.array([1, 2, 3]), pa.array(["a", "b", "c"])],
+                    ["inner_feature", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorStructTypeInvalidSteps(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
             step: "wrong_step"
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.StructArray.from_arrays([
-            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
-            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
-        ], ["inner_feature", "x2"])
-    ], ["ragged_feature"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorStructTypeTooManySteps(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([[1, 2, 3]], pa.list_(pa.int64())),
+                        pa.array([["a", "b", "c"]], pa.list_(pa.binary())),
+                    ],
+                    ["inner_feature", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorStructTypeTooManySteps(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -1539,45 +1777,63 @@ def testRaggedTensorStructTypeTooManySteps(self):
             step: "non_existant_feature"
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.StructArray.from_arrays([
-            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
-            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
-        ], ["inner_feature", "x2"])
-    ], ["ragged_feature"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorStructTypeNonLeaf(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([[1, 2, 3]], pa.list_(pa.int64())),
+                        pa.array([["a", "b", "c"]], pa.list_(pa.binary())),
+                    ],
+                    ["inner_feature", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorStructTypeNonLeaf(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.StructArray.from_arrays([
-            pa.array([[1, 2, 3]], pa.list_(pa.int64())),
-            pa.array([["a", "b", "c"]], pa.list_(pa.binary()))
-        ], ["inner_feature", "x2"])
-    ], ["ragged_feature"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorWithoutSamePathForPartition(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [
+                        pa.array([[1, 2, 3]], pa.list_(pa.int64())),
+                        pa.array([["a", "b", "c"]], pa.list_(pa.binary())),
+                    ],
+                    ["inner_feature", "x2"],
+                )
+            ],
+            ["ragged_feature"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorWithoutSamePathForPartition(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "struct"
@@ -1585,22 +1841,29 @@ def testRaggedTensorWithoutSamePathForPartition(self):
           }
           partition { row_length: "row_length" }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.StructArray.from_arrays(
-            [pa.array([[1, 2, 3]], pa.list_(pa.int64()))], ["ragged_feature"]),
-        pa.array([[1, 2]], pa.list_(pa.int64()))
-    ], ["struct", "row_length"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorWithNestedRowLengths(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.StructArray.from_arrays(
+                    [pa.array([[1, 2, 3]], pa.list_(pa.int64()))], ["ragged_feature"]
+                ),
+                pa.array([[1, 2]], pa.list_(pa.int64())),
+            ],
+            ["struct", "row_length"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorWithNestedRowLengths(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
@@ -1608,85 +1871,115 @@ def testRaggedTensorWithNestedRowLengths(self):
           partition { row_length: "row_length" }
           row_partition_dtype: INT64
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array([[[1, 1]], None, [[2]], [[3, 3, 4], [5]], []],
-                 type=pa.list_(pa.large_list(pa.int64()))),
-        pa.array([[[2]], None, [[1]], [[2, 1], [1]], []],
-                 type=pa.list_(pa.large_list(pa.int64()))),
-    ], ["ragged_feature", "row_length"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorWithEmptyFeaturePath(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[[1, 1]], None, [[2]], [[3, 3, 4], [5]], []],
+                    type=pa.list_(pa.large_list(pa.int64())),
+                ),
+                pa.array(
+                    [[[2]], None, [[1]], [[2, 1], [1]], []],
+                    type=pa.list_(pa.large_list(pa.int64())),
+                ),
+            ],
+            ["ragged_feature", "row_length"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorWithEmptyFeaturePath(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path { }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array([[[1, 1]], None, [[2]], [[3, 3, 4], [5]], []],
-                 type=pa.list_(pa.large_list(pa.int64()))),
-    ], ["ragged_feature"])
-    with self.assertRaisesRegex(ValueError,
-                                ".*Unable to handle tensor output.*"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                             {"output": tensor_representation}))
-
-  @test_util.run_in_graph_and_eager_modes
-  def testRaggedTensorSlicedRecordBatch(self):
-    tensor_representation = text_format.Parse(
-        """
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array(
+                    [[[1, 1]], None, [[2]], [[3, 3, 4], [5]], []],
+                    type=pa.list_(pa.large_list(pa.int64())),
+                ),
+            ],
+            ["ragged_feature"],
+        )
+        with self.assertRaisesRegex(ValueError, ".*Unable to handle tensor output.*"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    record_batch.schema, {"output": tensor_representation}
+                )
+            )
+
+    @test_util.run_in_graph_and_eager_modes
+    def testRaggedTensorSlicedRecordBatch(self):
+        tensor_representation = text_format.Parse(
+            """
         ragged_tensor {
           feature_path {
             step: "ragged_feature"
           }
         }
-        """, schema_pb2.TensorRepresentation())
-    record_batch = pa.RecordBatch.from_arrays(
-        [pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64()))],
-        ["ragged_feature"])
-    record_batch = record_batch.slice(1, 3)
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           {"output": tensor_representation}))
-    with self.assertRaisesRegex(
-        ValueError,
-        ".*Error raised when handling tensor 'output'"):
-      adapter.ToBatchTensors(record_batch)
-
-  @test_util.run_in_graph_and_eager_modes
-  def testMultipleColumns(self):
-    record_batch = pa.RecordBatch.from_arrays([
-        pa.array([[1], [], [2, 3], None], type=pa.large_list(pa.int64())),
-        pa.array([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]],
-                 type=pa.list_(pa.float32())),
-        pa.array([None, [b"a", b"b"], [b"c", b"d"], None],
-                 type=pa.list_(pa.large_binary())),
-        pa.array([[b"w"], [b"x"], [b"y"], [b"z"]], type=pa.list_(pa.string())),
-    ], [
-        "int64_ragged",
-        "float_dense",
-        "bytes_ragged",
-        "bytes_dense",
-    ])
-
-    tensor_representations = {
-        "int64_varlen_sparse":
-            text_format.Parse(
+        """,
+            schema_pb2.TensorRepresentation(),
+        )
+        record_batch = pa.RecordBatch.from_arrays(
+            [pa.array([[1], None, [2], [3, 4, 5], []], type=pa.list_(pa.int64()))],
+            ["ragged_feature"],
+        )
+        record_batch = record_batch.slice(1, 3)
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, {"output": tensor_representation}
+            )
+        )
+        with self.assertRaisesRegex(
+            ValueError, ".*Error raised when handling tensor 'output'"
+        ):
+            adapter.ToBatchTensors(record_batch)
+
+    @test_util.run_in_graph_and_eager_modes
+    def testMultipleColumns(self):
+        record_batch = pa.RecordBatch.from_arrays(
+            [
+                pa.array([[1], [], [2, 3], None], type=pa.large_list(pa.int64())),
+                pa.array(
+                    [[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]],
+                    type=pa.list_(pa.float32()),
+                ),
+                pa.array(
+                    [None, [b"a", b"b"], [b"c", b"d"], None],
+                    type=pa.list_(pa.large_binary()),
+                ),
+                pa.array([[b"w"], [b"x"], [b"y"], [b"z"]], type=pa.list_(pa.string())),
+            ],
+            [
+                "int64_ragged",
+                "float_dense",
+                "bytes_ragged",
+                "bytes_dense",
+            ],
+        )
+
+        tensor_representations = {
+            "int64_varlen_sparse": text_format.Parse(
                 """
         varlen_sparse_tensor {
           column_name: "int64_ragged"
         }
-        """, schema_pb2.TensorRepresentation()),
-        "float_dense":
-            text_format.Parse(
+        """,
+                schema_pb2.TensorRepresentation(),
+            ),
+            "float_dense": text_format.Parse(
                 """
         dense_tensor {
           column_name: "float_dense"
@@ -1698,25 +1991,28 @@ def testMultipleColumns(self):
               size: 1
             }
           }
-        }""", schema_pb2.TensorRepresentation()),
-        "bytes_varlen_sparse":
-            text_format.Parse(
+        }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+            "bytes_varlen_sparse": text_format.Parse(
                 """
         varlen_sparse_tensor {
           column_name: "bytes_ragged"
         }
-        """, schema_pb2.TensorRepresentation()),
-        "bytes_dense":
-            text_format.Parse(
+        """,
+                schema_pb2.TensorRepresentation(),
+            ),
+            "bytes_dense": text_format.Parse(
                 """
         dense_tensor {
           column_name: "bytes_dense"
           shape {
           }
         }
-        """, schema_pb2.TensorRepresentation()),
-        "bytes_default_filled_dense":
-            text_format.Parse(
+        """,
+                schema_pb2.TensorRepresentation(),
+            ),
+            "bytes_default_filled_dense": text_format.Parse(
                 """
         dense_tensor {
           column_name: "bytes_ragged"
@@ -1729,139 +2025,175 @@ def testMultipleColumns(self):
             bytes_value: "kk"
           }
         }
-        """, schema_pb2.TensorRepresentation()),
-    }
+        """,
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
 
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(record_batch.schema,
-                                           tensor_representations))
-    type_specs = adapter.TypeSpecs()
-    self.assertEqual(
-        type_specs, {
-            "int64_varlen_sparse":
-                tf.SparseTensorSpec(shape=[None, None], dtype=tf.int64),
-            "bytes_varlen_sparse":
-                tf.SparseTensorSpec(shape=[None, None], dtype=tf.string),
-            "float_dense":
-                tf.TensorSpec(shape=[None, 2, 1], dtype=tf.float32),
-            "bytes_dense":
-                tf.TensorSpec(shape=[None], dtype=tf.string),
-            "bytes_default_filled_dense":
-                tf.TensorSpec(shape=[None, 2], dtype=tf.string),
-        })
-
-    tensors = adapter.ToBatchTensors(record_batch)
-    self.assertLen(tensors, len(type_specs))
-    self.assertSparseAllEqual(
-        tf.SparseTensor(
-            values=tf.constant([1, 2, 3], dtype=tf.int64),
-            dense_shape=tf.constant([4, 2], dtype=tf.int64),
-            indices=tf.constant([[0, 0], [2, 0], [2, 1]], dtype=tf.int64)),
-        tensors["int64_varlen_sparse"])
-    self.assertSparseAllEqual(
-        tf.SparseTensor(
-            values=tf.constant([b"a", b"b", b"c", b"d"]),
-            dense_shape=tf.constant([4, 2], dtype=tf.int64),
-            indices=tf.constant([[1, 0], [1, 1], [2, 0], [2, 1]],
-                                dtype=tf.int64)),
-        tensors["bytes_varlen_sparse"])
-    self.assertAllEqual(
-        tf.constant(
-            [[[1.0], [2.0]], [[2.0], [3.0]], [[3.0], [4.0]], [[4.0], [5.0]]],
-            dtype=tf.float32), tensors["float_dense"])
-    self.assertAllEqual(
-        tf.constant([b"w", b"x", b"y", b"z"]), tensors["bytes_dense"])
-    self.assertAllEqual(
-        tf.constant([[b"kk", b"kk"], [b"a", b"b"], [b"c", b"d"],
-                     [b"kk", b"kk"]]), tensors["bytes_default_filled_dense"])
-
-    if tf.executing_eagerly():
-      for name, spec in type_specs.items():
-        self.assertTrue(
-            spec.is_compatible_with(tensors[name]),
-            "{} is not compatible with spec {}".format(tensors[name], spec))
-
-    self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
-
-  def testRaiseOnUnsupportedTensorRepresentation(self):
-    with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              pa.schema([pa.field("a", pa.list_(pa.int64()))]),
-              {"tensor": schema_pb2.TensorRepresentation()}))
-
-  def testRaiseOnNoMatchingHandler(self):
-    with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              # nested lists are not supported now.
-              pa.schema([
-                  pa.field("unsupported_column", pa.list_(pa.list_(pa.int64())))
-              ]),
-              {
-                  "tensor":
-                      text_format.Parse(
-                          """
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                record_batch.schema, tensor_representations
+            )
+        )
+        type_specs = adapter.TypeSpecs()
+        self.assertEqual(
+            type_specs,
+            {
+                "int64_varlen_sparse": tf.SparseTensorSpec(
+                    shape=[None, None], dtype=tf.int64
+                ),
+                "bytes_varlen_sparse": tf.SparseTensorSpec(
+                    shape=[None, None], dtype=tf.string
+                ),
+                "float_dense": tf.TensorSpec(shape=[None, 2, 1], dtype=tf.float32),
+                "bytes_dense": tf.TensorSpec(shape=[None], dtype=tf.string),
+                "bytes_default_filled_dense": tf.TensorSpec(
+                    shape=[None, 2], dtype=tf.string
+                ),
+            },
+        )
+
+        tensors = adapter.ToBatchTensors(record_batch)
+        self.assertLen(tensors, len(type_specs))
+        self.assertSparseAllEqual(
+            tf.SparseTensor(
+                values=tf.constant([1, 2, 3], dtype=tf.int64),
+                dense_shape=tf.constant([4, 2], dtype=tf.int64),
+                indices=tf.constant([[0, 0], [2, 0], [2, 1]], dtype=tf.int64),
+            ),
+            tensors["int64_varlen_sparse"],
+        )
+        self.assertSparseAllEqual(
+            tf.SparseTensor(
+                values=tf.constant([b"a", b"b", b"c", b"d"]),
+                dense_shape=tf.constant([4, 2], dtype=tf.int64),
+                indices=tf.constant([[1, 0], [1, 1], [2, 0], [2, 1]], dtype=tf.int64),
+            ),
+            tensors["bytes_varlen_sparse"],
+        )
+        self.assertAllEqual(
+            tf.constant(
+                [[[1.0], [2.0]], [[2.0], [3.0]], [[3.0], [4.0]], [[4.0], [5.0]]],
+                dtype=tf.float32,
+            ),
+            tensors["float_dense"],
+        )
+        self.assertAllEqual(
+            tf.constant([b"w", b"x", b"y", b"z"]), tensors["bytes_dense"]
+        )
+        self.assertAllEqual(
+            tf.constant([[b"kk", b"kk"], [b"a", b"b"], [b"c", b"d"], [b"kk", b"kk"]]),
+            tensors["bytes_default_filled_dense"],
+        )
+
+        if tf.executing_eagerly():
+            for name, spec in type_specs.items():
+                self.assertTrue(
+                    spec.is_compatible_with(tensors[name]),
+                    f"{tensors[name]} is not compatible with spec {spec}",
+                )
+
+        self.assertAdapterCanProduceNonEagerInEagerMode(adapter, record_batch)
+
+    def testRaiseOnUnsupportedTensorRepresentation(self):
+        with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    pa.schema([pa.field("a", pa.list_(pa.int64()))]),
+                    {"tensor": schema_pb2.TensorRepresentation()},
+                )
+            )
+
+    def testRaiseOnNoMatchingHandler(self):
+        with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    # nested lists are not supported now.
+                    pa.schema(
+                        [pa.field("unsupported_column", pa.list_(pa.list_(pa.int64())))]
+                    ),
+                    {
+                        "tensor": text_format.Parse(
+                            """
                   dense_tensor {
                     column_name: "unsupported_column"
                     shape: {}
                   }
-                  """, schema_pb2.TensorRepresentation())
-              }))
-
-  @parameterized.named_parameters(*_INVALID_DEFAULT_VALUE_TEST_CASES)
-  def testRaiseOnInvalidDefaultValue(self, value_type, default_value_pbtxt,
-                                     exception_regexp):
-    tensor_representation = text_format.Parse(
-        """
+                  """,
+                            schema_pb2.TensorRepresentation(),
+                        )
+                    },
+                )
+            )
+
+    @parameterized.named_parameters(*_INVALID_DEFAULT_VALUE_TEST_CASES)
+    def testRaiseOnInvalidDefaultValue(
+        self, value_type, default_value_pbtxt, exception_regexp
+    ):
+        tensor_representation = text_format.Parse(
+            """
                   dense_tensor {
                     column_name: "column"
                     shape {}
-                  }""", schema_pb2.TensorRepresentation())
-    tensor_representation.dense_tensor.default_value.CopyFrom(
-        text_format.Parse(default_value_pbtxt,
-                          schema_pb2.TensorRepresentation.DefaultValue()))
-    with self.assertRaisesRegex(ValueError, exception_regexp):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              pa.schema([pa.field("column", pa.list_(value_type))]),
-              {"tensor": tensor_representation}))
-
-  @parameterized.named_parameters(*_INVALID_SPARSE_TENSOR_TEST_CASES)
-  def testRaiseOnInvalidSparseTensorRepresentation(self,
-                                                   tensor_representation_textpb,
-                                                   arrow_schema):
-    tensor_representation = text_format.Parse(tensor_representation_textpb,
-                                              schema_pb2.TensorRepresentation())
-    with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
-      tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              pa.schema([pa.field(k, v) for k, v in arrow_schema.items()]),
-              {"tensor": tensor_representation}))
-
-  def testRaiseOnDenseTensorSizeMismatch(self):
-    tensor_representation = text_format.Parse(
-        """
+                  }""",
+            schema_pb2.TensorRepresentation(),
+        )
+        tensor_representation.dense_tensor.default_value.CopyFrom(
+            text_format.Parse(
+                default_value_pbtxt, schema_pb2.TensorRepresentation.DefaultValue()
+            )
+        )
+        with self.assertRaisesRegex(ValueError, exception_regexp):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    pa.schema([pa.field("column", pa.list_(value_type))]),
+                    {"tensor": tensor_representation},
+                )
+            )
+
+    @parameterized.named_parameters(*_INVALID_SPARSE_TENSOR_TEST_CASES)
+    def testRaiseOnInvalidSparseTensorRepresentation(
+        self, tensor_representation_textpb, arrow_schema
+    ):
+        tensor_representation = text_format.Parse(
+            tensor_representation_textpb, schema_pb2.TensorRepresentation()
+        )
+        with self.assertRaisesRegex(ValueError, "Unable to handle tensor"):
+            tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    pa.schema([pa.field(k, v) for k, v in arrow_schema.items()]),
+                    {"tensor": tensor_representation},
+                )
+            )
+
+    def testRaiseOnDenseTensorSizeMismatch(self):
+        tensor_representation = text_format.Parse(
+            """
                   dense_tensor {
                     column_name: "column"
                     shape {}
-                  }""", schema_pb2.TensorRepresentation())
-    with self.assertRaisesRegex(ValueError,
-                                ".*Error raised when handling tensor 'tensor'"):
-      ta = tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              pa.schema([pa.field("column", pa.list_(pa.int64()))]),
-              {"tensor": tensor_representation}))
-      ta.ToBatchTensors(
-          pa.RecordBatch.from_arrays(
-              [pa.array([[1], None, [2]], type=pa.list_(pa.int64()))],
-              ["column"]))
-
-  def testOriginalTypeSpecs(self):
-    arrow_schema = pa.schema([pa.field("column1", pa.list_(pa.int32()))])
-    tensor_representations = {
-        "column1":
-            text_format.Parse(
+                  }""",
+            schema_pb2.TensorRepresentation(),
+        )
+        with self.assertRaisesRegex(
+            ValueError, ".*Error raised when handling tensor 'tensor'"
+        ):
+            ta = tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    pa.schema([pa.field("column", pa.list_(pa.int64()))]),
+                    {"tensor": tensor_representation},
+                )
+            )
+            ta.ToBatchTensors(
+                pa.RecordBatch.from_arrays(
+                    [pa.array([[1], None, [2]], type=pa.list_(pa.int64()))], ["column"]
+                )
+            )
+
+    def testOriginalTypeSpecs(self):
+        arrow_schema = pa.schema([pa.field("column1", pa.list_(pa.int32()))])
+        tensor_representations = {
+            "column1": text_format.Parse(
                 """
                 dense_tensor {
                   column_name: "column1"
@@ -1870,43 +2202,49 @@ def testOriginalTypeSpecs(self):
                       size: 1
                     }
                   }
-                }""", schema_pb2.TensorRepresentation())
-    }
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(arrow_schema,
-                                           tensor_representations))
-    self.assertLen(adapter.TypeSpecs(), 1)
-    self.assertEqual(adapter.TypeSpecs(), adapter.OriginalTypeSpecs())
-
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(
-            arrow_schema,
-            tensor_representations,
-            original_type_specs={
-                "column1": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
-                "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1])
-            }))
-    self.assertLen(adapter.TypeSpecs(), 1)
-    self.assertLen(adapter.OriginalTypeSpecs(), 2)
-
-    with self.assertRaisesRegex(ValueError,
-                                "original_type_specs must be a superset"):
-      adapter = tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              arrow_schema,
-              tensor_representations,
-              original_type_specs={
-                  # mismatch spec of column1
-                  "column1": tf.TensorSpec(dtype=tf.int64, shape=[None, 1]),
-                  "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1])
-              }))
-
-  def testPickleTensorAdapterConfig(self):
-    config = tensor_adapter.TensorAdapterConfig(
-        arrow_schema=pa.schema([pa.field("column1", pa.list_(pa.int32()))]),
-        tensor_representations={
-            "column1":
-                text_format.Parse(
+                }""",
+                schema_pb2.TensorRepresentation(),
+            )
+        }
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(arrow_schema, tensor_representations)
+        )
+        self.assertLen(adapter.TypeSpecs(), 1)
+        self.assertEqual(adapter.TypeSpecs(), adapter.OriginalTypeSpecs())
+
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                arrow_schema,
+                tensor_representations,
+                original_type_specs={
+                    "column1": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
+                    "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
+                },
+            )
+        )
+        self.assertLen(adapter.TypeSpecs(), 1)
+        self.assertLen(adapter.OriginalTypeSpecs(), 2)
+
+        with self.assertRaisesRegex(
+            ValueError, "original_type_specs must be a superset"
+        ):
+            adapter = tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    arrow_schema,
+                    tensor_representations,
+                    original_type_specs={
+                        # mismatch spec of column1
+                        "column1": tf.TensorSpec(dtype=tf.int64, shape=[None, 1]),
+                        "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
+                    },
+                )
+            )
+
+    def testPickleTensorAdapterConfig(self):
+        config = tensor_adapter.TensorAdapterConfig(
+            arrow_schema=pa.schema([pa.field("column1", pa.list_(pa.int32()))]),
+            tensor_representations={
+                "column1": text_format.Parse(
                     """
                 dense_tensor {
                   column_name: "column1"
@@ -1915,19 +2253,24 @@ def testPickleTensorAdapterConfig(self):
                       size: 1
                     }
                   }
-                }""", schema_pb2.TensorRepresentation())
-        },
-        original_type_specs={
-            "column1": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
-            "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1])
-        })
-    unpickled_config = pickle.loads(pickle.dumps(config))
-    self.assertEqual(config.arrow_schema, unpickled_config.arrow_schema)
-    self.assertEqual(config.tensor_representations,
-                     unpickled_config.tensor_representations)
-    self.assertEqual(config.original_type_specs,
-                     unpickled_config.original_type_specs)
+                }""",
+                    schema_pb2.TensorRepresentation(),
+                )
+            },
+            original_type_specs={
+                "column1": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
+                "column2": tf.TensorSpec(dtype=tf.int32, shape=[None, 1]),
+            },
+        )
+        unpickled_config = pickle.loads(pickle.dumps(config))
+        self.assertEqual(config.arrow_schema, unpickled_config.arrow_schema)
+        self.assertEqual(
+            config.tensor_representations, unpickled_config.tensor_representations
+        )
+        self.assertEqual(
+            config.original_type_specs, unpickled_config.original_type_specs
+        )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/tensor_representation_util.py b/tfx_bsl/tfxio/tensor_representation_util.py
index 0e91b23c..b5ab07ec 100644
--- a/tfx_bsl/tfxio/tensor_representation_util.py
+++ b/tfx_bsl/tfxio/tensor_representation_util.py
@@ -15,34 +15,35 @@
 
 from typing import Dict, Iterable, List, Mapping, Optional, Tuple, Union
 
-from absl import logging
 import numpy as np
 import tensorflow as tf
+from absl import logging
+from tensorflow_metadata.proto.v0 import path_pb2, schema_pb2
+
 from tfx_bsl.arrow import path
 from tfx_bsl.types import common_types
 
-from tensorflow_metadata.proto.v0 import path_pb2
-from tensorflow_metadata.proto.v0 import schema_pb2
-
 _DEFAULT_TENSOR_REPRESENTATION_GROUP = ""
 
 _DISQUALIFYING_LIFECYCLE_STAGES = (
-    schema_pb2.DEPRECATED, schema_pb2.DISABLED, schema_pb2.PLANNED,
-    schema_pb2.ALPHA, schema_pb2.DEBUG_ONLY, schema_pb2.VALIDATION_DERIVED,
+    schema_pb2.DEPRECATED,
+    schema_pb2.DISABLED,
+    schema_pb2.PLANNED,
+    schema_pb2.ALPHA,
+    schema_pb2.DEBUG_ONLY,
+    schema_pb2.VALIDATION_DERIVED,
 )
 
 # The schema proto may not contain this field, which means the legacy logic
 # does not apply.
-_IS_LEGACY_SCHEMA = ("generate_legacy_feature_spec" in
-                     schema_pb2.Schema.DESCRIPTOR.fields_by_name)
+_IS_LEGACY_SCHEMA = (
+    "generate_legacy_feature_spec" in schema_pb2.Schema.DESCRIPTOR.fields_by_name
+)
 
 _LEGACY_DEFAULT_VALUE_FOR_FEATURE_TYPE = {
-    schema_pb2.BYTES:
-        schema_pb2.TensorRepresentation.DefaultValue(bytes_value=b""),
-    schema_pb2.INT:
-        schema_pb2.TensorRepresentation.DefaultValue(int_value=-1),
-    schema_pb2.FLOAT:
-        schema_pb2.TensorRepresentation.DefaultValue(float_value=-1.0),
+    schema_pb2.BYTES: schema_pb2.TensorRepresentation.DefaultValue(bytes_value=b""),
+    schema_pb2.INT: schema_pb2.TensorRepresentation.DefaultValue(int_value=-1),
+    schema_pb2.FLOAT: schema_pb2.TensorRepresentation.DefaultValue(float_value=-1.0),
 }
 
 _FEATURE_TYPE_TO_TF_TYPE = {
@@ -59,742 +60,852 @@
 
 
 def _GetSparseTensorRepresentationUsedColumns(
-    sparse_tensor_rep: schema_pb2.TensorRepresentation.SparseTensor
+    sparse_tensor_rep: schema_pb2.TensorRepresentation.SparseTensor,
 ) -> List[path.ColumnPath]:
-  result = [path.ColumnPath(c) for c in sparse_tensor_rep.index_column_names]
-  if sparse_tensor_rep.HasField("value_column_name"):
-    result.append(path.ColumnPath(sparse_tensor_rep.value_column_name))
-  return result
+    result = [path.ColumnPath(c) for c in sparse_tensor_rep.index_column_names]
+    if sparse_tensor_rep.HasField("value_column_name"):
+        result.append(path.ColumnPath(sparse_tensor_rep.value_column_name))
+    return result
 
 
 def _GetRaggedTensorRepresentationUsedColumns(
-    ragged_tensor_rep: schema_pb2.TensorRepresentation.RaggedTensor
+    ragged_tensor_rep: schema_pb2.TensorRepresentation.RaggedTensor,
 ) -> List[path.ColumnPath]:
-  """Returns a list of ColumnPaths used by the Ragged TensorRepresentation."""
-  value_column_path = path.ColumnPath.from_proto(ragged_tensor_rep.feature_path)
-  result = [value_column_path]
-  for partition in ragged_tensor_rep.partition:
-    if partition.HasField("row_length"):
-      result.append(value_column_path.parent().child(partition.row_length))
-  return result
+    """Returns a list of ColumnPaths used by the Ragged TensorRepresentation."""
+    value_column_path = path.ColumnPath.from_proto(ragged_tensor_rep.feature_path)
+    result = [value_column_path]
+    for partition in ragged_tensor_rep.partition:
+        if partition.HasField("row_length"):
+            result.append(value_column_path.parent().child(partition.row_length))
+    return result
 
 
 _TENSOR_REPRESENTATION_KIND_TO_COLUMNS_GETTER = {
-    "dense_tensor":
-        lambda tr: [path.ColumnPath(tr.dense_tensor.column_name)],
-    "varlen_sparse_tensor":
-        lambda tr: [path.ColumnPath(tr.varlen_sparse_tensor.column_name)],
-    "sparse_tensor":
-        lambda tr: _GetSparseTensorRepresentationUsedColumns(tr.sparse_tensor),
-    "ragged_tensor":
-        lambda tr: _GetRaggedTensorRepresentationUsedColumns(tr.ragged_tensor),
-    None:
-        lambda _: [],
+    "dense_tensor": lambda tr: [path.ColumnPath(tr.dense_tensor.column_name)],
+    "varlen_sparse_tensor": lambda tr: [
+        path.ColumnPath(tr.varlen_sparse_tensor.column_name)
+    ],
+    "sparse_tensor": lambda tr: _GetSparseTensorRepresentationUsedColumns(
+        tr.sparse_tensor
+    ),
+    "ragged_tensor": lambda tr: _GetRaggedTensorRepresentationUsedColumns(
+        tr.ragged_tensor
+    ),
+    None: lambda _: [],
 }
 
 _TENSOR_REPRESENTATION_KIND_TO_VALUE_COLUMN_GETTER = {
-    "dense_tensor":
-        lambda tr: path.ColumnPath(tr.dense_tensor.column_name),
-    "varlen_sparse_tensor":
-        lambda tr: path.ColumnPath(tr.varlen_sparse_tensor.column_name),
-    "sparse_tensor":
-        lambda tr: path.ColumnPath(tr.sparse_tensor.value_column_name),
-    "ragged_tensor":
-        lambda tr: path.ColumnPath.from_proto(tr.ragged_tensor.feature_path)
+    "dense_tensor": lambda tr: path.ColumnPath(tr.dense_tensor.column_name),
+    "varlen_sparse_tensor": lambda tr: path.ColumnPath(
+        tr.varlen_sparse_tensor.column_name
+    ),
+    "sparse_tensor": lambda tr: path.ColumnPath(tr.sparse_tensor.value_column_name),
+    "ragged_tensor": lambda tr: path.ColumnPath.from_proto(
+        tr.ragged_tensor.feature_path
+    ),
 }
 
 
 def SetTensorRepresentationsInSchema(
     schema: schema_pb2.Schema,
     tensor_representations: Mapping[str, schema_pb2.TensorRepresentation],
-    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP
+    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP,
 ) -> None:
-  """Sets the TensorRepresentationGroup of the given name to the given value."""
-  tensor_representation_map = schema.tensor_representation_group[
-      tensor_representation_group_name].tensor_representation
-  tensor_representation_map.clear()
-  for k, v in tensor_representations.items():
-    tensor_representation_map[k].CopyFrom(v)
+    """Sets the TensorRepresentationGroup of the given name to the given value."""
+    tensor_representation_map = schema.tensor_representation_group[
+        tensor_representation_group_name
+    ].tensor_representation
+    tensor_representation_map.clear()
+    for k, v in tensor_representations.items():
+        tensor_representation_map[k].CopyFrom(v)
 
 
 def GetTensorRepresentationsFromSchema(
     schema: schema_pb2.Schema,
-    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP
+    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP,
 ) -> Optional[Dict[str, schema_pb2.TensorRepresentation]]:
-  """Gets a TensorRepresentationGroup as a dict<tensor_name,rep> from schema.
+    """Gets a TensorRepresentationGroup as a dict<tensor_name,rep> from schema.
 
-  If the group name is provided, look it up in the schema, otherwise, look for
-  the default group.
+    If the group name is provided, look it up in the schema, otherwise, look for
+    the default group.
 
-  Args:
-    schema: a schema_pb2.Schema.
-    tensor_representation_group_name: (optional) the name of the group to look
-      for. If not provided, look for the default name.
+    Args:
+    ----
+      schema: a schema_pb2.Schema.
+      tensor_representation_group_name: (optional) the name of the group to look
+        for. If not provided, look for the default name.
 
-  Returns:
-    None if not found. Otherwise a dict with tensor names being keys and
-    TensorRepresentation as values.
-  """
-  group = schema.tensor_representation_group.get(
-      tensor_representation_group_name)
-  if group is None:
-    return None
-  return dict(group.tensor_representation)
+    Returns:
+    -------
+      None if not found. Otherwise a dict with tensor names being keys and
+      TensorRepresentation as values.
+    """
+    group = schema.tensor_representation_group.get(tensor_representation_group_name)
+    if group is None:
+        return None
+    return dict(group.tensor_representation)
 
 
 def InferTensorRepresentationsFromSchema(
-    schema: schema_pb2.Schema) -> Dict[str, schema_pb2.TensorRepresentation]:
-  """Infers TensorRepresentations from the schema's Features."""
-  if _ShouldUseLegacyLogic(schema):
-    infer_func = _LegacyInferTensorRepresentationFromSchema
-  else:
-    infer_func = _InferTensorRepresentationFromSchema
+    schema: schema_pb2.Schema,
+) -> Dict[str, schema_pb2.TensorRepresentation]:
+    """Infers TensorRepresentations from the schema's Features."""
+    if _ShouldUseLegacyLogic(schema):
+        infer_func = _LegacyInferTensorRepresentationFromSchema
+    else:
+        infer_func = _InferTensorRepresentationFromSchema
 
-  return infer_func(schema)
+    return infer_func(schema)
 
 
 def InferTensorRepresentationsFromMixedSchema(
-    schema: schema_pb2.Schema) -> Dict[str, schema_pb2.TensorRepresentation]:
-  """Infers TensorRepresentations from schema that has Features and TRs."""
-  tensor_representations = GetTensorRepresentationsFromSchema(schema)
-  inferred_tensor_representations = InferTensorRepresentationsFromSchema(schema)
-  if tensor_representations is None:
-    return inferred_tensor_representations
-  # Only keep inferred TRs that do not represent source columns. Existing TRs
-  # are preferred over the inferred in case of name collisions.
-  source_columns = set()
-  for tensor_representation in tensor_representations.values():
-    source_columns.update(
-        str(path) for path in GetSourceColumnsFromTensorRepresentation(
-            tensor_representation))
-  for name, tensor_representation in inferred_tensor_representations.items():
-    if name in source_columns:
-      # This feature is already used in the explicitly provided TRs.
-      continue
-    if name in tensor_representations:
-      logging.warning(
-          "Feature name %s conflicts with tensor representation name in the "
-          "same schema that does not use the feature as its source column. "
-          "Ignoring the feature and using the tensor representation.", name)
-    else:
-      tensor_representations[name] = tensor_representation
-  return tensor_representations
+    schema: schema_pb2.Schema,
+) -> Dict[str, schema_pb2.TensorRepresentation]:
+    """Infers TensorRepresentations from schema that has Features and TRs."""
+    tensor_representations = GetTensorRepresentationsFromSchema(schema)
+    inferred_tensor_representations = InferTensorRepresentationsFromSchema(schema)
+    if tensor_representations is None:
+        return inferred_tensor_representations
+    # Only keep inferred TRs that do not represent source columns. Existing TRs
+    # are preferred over the inferred in case of name collisions.
+    source_columns = set()
+    for tensor_representation in tensor_representations.values():
+        source_columns.update(
+            str(path)
+            for path in GetSourceColumnsFromTensorRepresentation(tensor_representation)
+        )
+    for name, tensor_representation in inferred_tensor_representations.items():
+        if name in source_columns:
+            # This feature is already used in the explicitly provided TRs.
+            continue
+        if name in tensor_representations:
+            logging.warning(
+                "Feature name %s conflicts with tensor representation name in the "
+                "same schema that does not use the feature as its source column. "
+                "Ignoring the feature and using the tensor representation.",
+                name,
+            )
+        else:
+            tensor_representations[name] = tensor_representation
+    return tensor_representations
 
 
 def GetSourceColumnsFromTensorRepresentation(
-    tensor_representation: schema_pb2.TensorRepresentation
+    tensor_representation: schema_pb2.TensorRepresentation,
 ) -> List[path.ColumnPath]:
-  """Returns columns required by the given TensorRepresentation."""
-
-  return _TENSOR_REPRESENTATION_KIND_TO_COLUMNS_GETTER[
-      tensor_representation.WhichOneof("kind")](
-          tensor_representation)
+    """Returns columns required by the given TensorRepresentation."""
+    return _TENSOR_REPRESENTATION_KIND_TO_COLUMNS_GETTER[
+        tensor_representation.WhichOneof("kind")
+    ](tensor_representation)
 
 
 def GetSourceValueColumnFromTensorRepresentation(
-    tensor_representation: schema_pb2.TensorRepresentation) -> path.ColumnPath:
-  """Returns the column name of value columns from the TensorRepresentation.
+    tensor_representation: schema_pb2.TensorRepresentation,
+) -> path.ColumnPath:
+    """Returns the column name of value columns from the TensorRepresentation.
 
-  Each tensor representation will have one or more value column. A value column
-  is a column that contributes to the values of a (composite) tensor. Certain
-  composite tensor may consists of data from multiple columns, with one
-  providing the values, others providing structural information.
+    Each tensor representation will have one or more value column. A value column
+    is a column that contributes to the values of a (composite) tensor. Certain
+    composite tensor may consists of data from multiple columns, with one
+    providing the values, others providing structural information.
 
-  Args:
-    tensor_representation: The tensor representation that contains tensor
-      construction information.
+    Args:
+    ----
+      tensor_representation: The tensor representation that contains tensor
+        construction information.
 
-  Raises:
-    KeyError: if the tensor representation's "kind" is invalid. Valid "kinds"
-      are dense_tensor, varlen_sparse_tensor, sparse_tensor, or ragged_tensor.
-  """
-  return _TENSOR_REPRESENTATION_KIND_TO_VALUE_COLUMN_GETTER[
-      tensor_representation.WhichOneof("kind")](
-          tensor_representation)
+    Raises:
+    ------
+      KeyError: if the tensor representation's "kind" is invalid. Valid "kinds"
+        are dense_tensor, varlen_sparse_tensor, sparse_tensor, or ragged_tensor.
+    """
+    return _TENSOR_REPRESENTATION_KIND_TO_VALUE_COLUMN_GETTER[
+        tensor_representation.WhichOneof("kind")
+    ](tensor_representation)
 
 
 def CreateTfExampleParserConfig(
     tensor_representation: schema_pb2.TensorRepresentation,
-    feature_type: schema_pb2.FeatureType) -> common_types.FeatureSpecType:
-  """Creates a Feature Configuration that is used for tf.io.parse_example.
-
-  Args:
-    tensor_representation: The tensor representation to convert to a Feature.
-    feature_type: The schema_pb2.FeatureType of the given feature. The supported
-      types are listed in _FEATURE_TYPE_TO_TF_TYPE.
-
-  Returns:
-    Either a `tf.io.FixedLenFeature`, `tf.io.VarLenFeature`,
-      `tf.io.SparseFeature`, or `tf.io.RaggedFeature`.
-
-  Raises:
-    ValueError: If the tensor_representation cannot be converted to a Feature.
-  """
-  value_dtype = _FEATURE_TYPE_TO_TF_TYPE.get(feature_type, None)
-  if value_dtype is None:
-    raise ValueError(
-        "The feature_type: {} is not supported.".format(feature_type))
-
-  tensor_representation_kind = tensor_representation.WhichOneof("kind")
-  if tensor_representation_kind == "dense_tensor":
-    dense_tensor_rep = tensor_representation.dense_tensor
-    shape = _GetDimsFromFixedShape(dense_tensor_rep.shape)
-    default_value = None
-    if dense_tensor_rep.HasField("default_value"):
-      default_value = _GetDefaultValuesList(shape, feature_type,
-                                            dense_tensor_rep.default_value)
-    return tf.io.FixedLenFeature(
-        shape=shape, dtype=value_dtype, default_value=default_value)
-  elif tensor_representation_kind == "varlen_sparse_tensor":
-    return tf.io.VarLenFeature(dtype=value_dtype)
-  elif tensor_representation_kind == "sparse_tensor":
-    sparse_tensor_rep = tensor_representation.sparse_tensor
-    return tf.io.SparseFeature(
-        index_key=list(sparse_tensor_rep.index_column_names),
-        value_key=sparse_tensor_rep.value_column_name,
-        dtype=value_dtype,
-        size=_GetDimsFromFixedShape(sparse_tensor_rep.dense_shape),
-        already_sorted=getattr(sparse_tensor_rep, "already_sorted", False))
-  elif tensor_representation_kind == "ragged_tensor":
-    ragged_tensor_rep = tensor_representation.ragged_tensor
-    if (ragged_tensor_rep.row_partition_dtype ==
-        schema_pb2.TensorRepresentation.RowPartitionDType.INT32):
-      row_splits_dtype = tf.int32
+    feature_type: schema_pb2.FeatureType,
+) -> common_types.FeatureSpecType:
+    """Creates a Feature Configuration that is used for tf.io.parse_example.
+
+    Args:
+    ----
+      tensor_representation: The tensor representation to convert to a Feature.
+      feature_type: The schema_pb2.FeatureType of the given feature. The supported
+        types are listed in _FEATURE_TYPE_TO_TF_TYPE.
+
+    Returns:
+    -------
+      Either a `tf.io.FixedLenFeature`, `tf.io.VarLenFeature`,
+        `tf.io.SparseFeature`, or `tf.io.RaggedFeature`.
+
+    Raises:
+    ------
+      ValueError: If the tensor_representation cannot be converted to a Feature.
+    """
+    value_dtype = _FEATURE_TYPE_TO_TF_TYPE.get(feature_type)
+    if value_dtype is None:
+        raise ValueError(f"The feature_type: {feature_type} is not supported.")
+
+    tensor_representation_kind = tensor_representation.WhichOneof("kind")
+    if tensor_representation_kind == "dense_tensor":
+        dense_tensor_rep = tensor_representation.dense_tensor
+        shape = _GetDimsFromFixedShape(dense_tensor_rep.shape)
+        default_value = None
+        if dense_tensor_rep.HasField("default_value"):
+            default_value = _GetDefaultValuesList(
+                shape, feature_type, dense_tensor_rep.default_value
+            )
+        return tf.io.FixedLenFeature(
+            shape=shape, dtype=value_dtype, default_value=default_value
+        )
+    elif tensor_representation_kind == "varlen_sparse_tensor":
+        return tf.io.VarLenFeature(dtype=value_dtype)
+    elif tensor_representation_kind == "sparse_tensor":
+        sparse_tensor_rep = tensor_representation.sparse_tensor
+        return tf.io.SparseFeature(
+            index_key=list(sparse_tensor_rep.index_column_names),
+            value_key=sparse_tensor_rep.value_column_name,
+            dtype=value_dtype,
+            size=_GetDimsFromFixedShape(sparse_tensor_rep.dense_shape),
+            already_sorted=getattr(sparse_tensor_rep, "already_sorted", False),
+        )
+    elif tensor_representation_kind == "ragged_tensor":
+        ragged_tensor_rep = tensor_representation.ragged_tensor
+        if (
+            ragged_tensor_rep.row_partition_dtype
+            == schema_pb2.TensorRepresentation.RowPartitionDType.INT32
+        ):
+            row_splits_dtype = tf.int32
+        else:
+            row_splits_dtype = tf.int64
+
+        partitions = []
+        if len(ragged_tensor_rep.feature_path.step) > 1:
+            raise ValueError(
+                "Parsing spec from a RaggedTensor with multiple steps in "
+                "feature_path is not implemented."
+            )
+        if not ragged_tensor_rep.feature_path.step:
+            raise ValueError("RaggedTensor representation with empty feature_path.")
+        for partition in ragged_tensor_rep.partition:
+            if partition.HasField("uniform_row_length"):
+                partitions.append(
+                    tf.io.RaggedFeature.UniformRowLength(  # pytype:disable=attribute-error
+                        partition.uniform_row_length
+                    )
+                )
+            elif partition.HasField("row_length"):
+                partitions.append(
+                    tf.io.RaggedFeature.RowLengths(  # pytype:disable=attribute-error
+                        partition.row_length
+                    )
+                )
+            else:
+                raise NotImplementedError(
+                    "RaggedTensor partition type not implemented: {}.".format(
+                        partition.WhichOneof("kind")
+                    )
+                )
+        return tf.io.RaggedFeature(
+            dtype=value_dtype,
+            value_key=ragged_tensor_rep.feature_path.step[0],
+            partitions=partitions,
+            row_splits_dtype=row_splits_dtype,
+        )
     else:
-      row_splits_dtype = tf.int64
-
-    partitions = []
-    if len(ragged_tensor_rep.feature_path.step) > 1:
-      raise ValueError(
-          "Parsing spec from a RaggedTensor with multiple steps in "
-          "feature_path is not implemented.")
-    if not ragged_tensor_rep.feature_path.step:
-      raise ValueError("RaggedTensor representation with empty feature_path.")
-    for partition in ragged_tensor_rep.partition:
-      if partition.HasField("uniform_row_length"):
-        partitions.append(
-            tf.io.RaggedFeature.UniformRowLength(  # pytype:disable=attribute-error
-                partition.uniform_row_length))
-      elif partition.HasField("row_length"):
-        partitions.append(
-            tf.io.RaggedFeature.RowLengths(  # pytype:disable=attribute-error
-                partition.row_length))
-      else:
         raise NotImplementedError(
-            "RaggedTensor partition type not implemented: {}.".format(
-                partition.WhichOneof("kind")))
-    return tf.io.RaggedFeature(
-        dtype=value_dtype,
-        value_key=ragged_tensor_rep.feature_path.step[0],
-        partitions=partitions,
-        row_splits_dtype=row_splits_dtype)
-  else:
-    raise NotImplementedError(
-        "TensorRepresentation: {} is not supported.".format(
-            tensor_representation_kind))
+            f"TensorRepresentation: {tensor_representation_kind} is not supported."
+        )
 
 
 def _GetPrimitiveFeatureTypes(
-    features: List["schema_pb2.Feature"]
+    features: List["schema_pb2.Feature"],
 ) -> Dict[path.ColumnPath, "schema_pb2.FeatureType"]:
-  """Recursively extracts types of all primitive features in the given list."""
-  result = {}
-  for feature in features:
-    if feature.type == schema_pb2.STRUCT:
-      if feature.struct_domain.sparse_feature:
-        raise ValueError(
-            "Struct features with sparse domains are not supported.")
-      for child_path, child_type in _GetPrimitiveFeatureTypes(
-          list(feature.struct_domain.feature)).items():
-        full_path = path.ColumnPath((feature.name,) + child_path.steps())
-        result[full_path] = child_type
-    else:
-      result[path.ColumnPath((feature.name,))] = feature.type
-  return result
+    """Recursively extracts types of all primitive features in the given list."""
+    result = {}
+    for feature in features:
+        if feature.type == schema_pb2.STRUCT:
+            if feature.struct_domain.sparse_feature:
+                raise ValueError(
+                    "Struct features with sparse domains are not supported."
+                )
+            for child_path, child_type in _GetPrimitiveFeatureTypes(
+                list(feature.struct_domain.feature)
+            ).items():
+                full_path = path.ColumnPath((feature.name,) + child_path.steps())
+                result[full_path] = child_type
+        else:
+            result[path.ColumnPath((feature.name,))] = feature.type
+    return result
 
 
 def CreateTfSequenceExampleParserConfig(
-    schema: schema_pb2.Schema
-) -> Tuple[Dict[str, common_types.FeatureSpecType], Dict[
-    str, common_types.FeatureSpecType]]:
-  """Creates feature specs that are used for tf.io.parse_sequence_example.
-
-  Args:
-    schema: A TFMD Schema describing the features that are going to be parsed.
-      The parsing config is generated from `TensorRepresentation`s and therefore
-      the schema is expected to have them for all parsed features. Sequence
-      features are expected to be described in a `StructDomain` of a top-level
-      `STRUCT` sequence feature.
-
-  Returns:
-    A tuple of context and sequence feature spec dictionaries that map tensor
-    names to either `tf.io.FixedLenFeature`, `tf.io.VarLenFeature`,
-    `tf.io.SparseFeature`, or `tf.io.RaggedFeature`.
-
-  Raises:
-    ValueError: If the schema does not contain tensor representations or if the
-      top-level `STRUCT` feature is not valid.
-  """
-  tensor_representations = GetTensorRepresentationsFromSchema(schema)
-  if tensor_representations is None:
-    raise ValueError("TensorRepresentations are required.")
-  column_name_to_type = _GetPrimitiveFeatureTypes(list(schema.feature))
-  context_features = {}
-  sequence_features = {}
-  for name, tensor_representation in tensor_representations.items():
-    value_column = GetSourceValueColumnFromTensorRepresentation(
-        tensor_representation)
-    value_type = column_name_to_type[value_column]
-    if len(value_column) == 1:
-      context_features[name] = CreateTfExampleParserConfig(
-          tensor_representation, value_type)
-    elif len(value_column) == 2:
-      # Only ragged tensor representation supports paths with multiple steps.
-      assert tensor_representation.WhichOneof("kind") == "ragged_tensor", (
-          tensor_representation)
-      sequence_tensor_representation = schema_pb2.TensorRepresentation()
-      sequence_tensor_representation.CopyFrom(tensor_representation)
-      # Now that we know that it's a sequence feature, we put it in the proper
-      # dictionary and pop the sequence feature path.
-      sequence_tensor_representation.ragged_tensor.feature_path.step.pop(0)
-      sequence_features[name] = CreateTfExampleParserConfig(
-          sequence_tensor_representation, value_type)
-    else:
-      raise ValueError(
-          f"Only primitive or struct of primitive features are supported, "
-          f"got {tensor_representation}")
-  return context_features, sequence_features
+    schema: schema_pb2.Schema,
+) -> Tuple[
+    Dict[str, common_types.FeatureSpecType], Dict[str, common_types.FeatureSpecType]
+]:
+    """Creates feature specs that are used for tf.io.parse_sequence_example.
+
+    Args:
+    ----
+      schema: A TFMD Schema describing the features that are going to be parsed.
+        The parsing config is generated from `TensorRepresentation`s and therefore
+        the schema is expected to have them for all parsed features. Sequence
+        features are expected to be described in a `StructDomain` of a top-level
+        `STRUCT` sequence feature.
+
+    Returns:
+    -------
+      A tuple of context and sequence feature spec dictionaries that map tensor
+      names to either `tf.io.FixedLenFeature`, `tf.io.VarLenFeature`,
+      `tf.io.SparseFeature`, or `tf.io.RaggedFeature`.
+
+    Raises:
+    ------
+      ValueError: If the schema does not contain tensor representations or if the
+        top-level `STRUCT` feature is not valid.
+    """
+    tensor_representations = GetTensorRepresentationsFromSchema(schema)
+    if tensor_representations is None:
+        raise ValueError("TensorRepresentations are required.")
+    column_name_to_type = _GetPrimitiveFeatureTypes(list(schema.feature))
+    context_features = {}
+    sequence_features = {}
+    for name, tensor_representation in tensor_representations.items():
+        value_column = GetSourceValueColumnFromTensorRepresentation(
+            tensor_representation
+        )
+        value_type = column_name_to_type[value_column]
+        if len(value_column) == 1:
+            context_features[name] = CreateTfExampleParserConfig(
+                tensor_representation, value_type
+            )
+        elif len(value_column) == 2:
+            # Only ragged tensor representation supports paths with multiple steps.
+            assert (
+                tensor_representation.WhichOneof("kind") == "ragged_tensor"
+            ), tensor_representation
+            sequence_tensor_representation = schema_pb2.TensorRepresentation()
+            sequence_tensor_representation.CopyFrom(tensor_representation)
+            # Now that we know that it's a sequence feature, we put it in the proper
+            # dictionary and pop the sequence feature path.
+            sequence_tensor_representation.ragged_tensor.feature_path.step.pop(0)
+            sequence_features[name] = CreateTfExampleParserConfig(
+                sequence_tensor_representation, value_type
+            )
+        else:
+            raise ValueError(
+                f"Only primitive or struct of primitive features are supported, "
+                f"got {tensor_representation}"
+            )
+    return context_features, sequence_features
 
 
 def _ShouldIncludeFeature(
-    feature: Union[schema_pb2.Feature, schema_pb2.SparseFeature]) -> bool:
-  return not (feature.deprecated or
-              feature.lifecycle_stage in _DISQUALIFYING_LIFECYCLE_STAGES)
+    feature: Union[schema_pb2.Feature, schema_pb2.SparseFeature],
+) -> bool:
+    return not (
+        feature.deprecated or feature.lifecycle_stage in _DISQUALIFYING_LIFECYCLE_STAGES
+    )
 
 
 def _InferTensorRepresentationsFromStruct(
-    feature: schema_pb2.Feature) -> Dict[str, schema_pb2.TensorRepresentation]:
-  """Infers RaggedTensor TensorRepresentations from the given STRUCT feature."""
-  if feature.type != schema_pb2.FeatureType.STRUCT:
-    raise ValueError(
-        f"Expected STRUCT, got {feature.type} for feature {feature.name}.")
-  if feature.struct_domain.sparse_feature:
-    raise NotImplementedError(
-        f"Got sparse features in struct domain of {feature.name}. Struct "
-        "features with sparse domains are not supported.")
-  result = {}
-  struct_path = path.ColumnPath((feature.name,))
-  for child_feature in feature.struct_domain.feature:
-    if child_feature.type == schema_pb2.FeatureType.STRUCT:
-      raise NotImplementedError(
-          f"Got STRUCT feature {child_feature.name} in struct domain of "
-          f"{feature.name}. STRUCT features with multiple levels of nestedness "
-          "are not supported.")
-    child_path = struct_path.child(child_feature.name)
-    result[str(child_path)] = schema_pb2.TensorRepresentation(
-        ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor(
-            feature_path=child_path.to_proto()))
-  return result
+    feature: schema_pb2.Feature,
+) -> Dict[str, schema_pb2.TensorRepresentation]:
+    """Infers RaggedTensor TensorRepresentations from the given STRUCT feature."""
+    if feature.type != schema_pb2.FeatureType.STRUCT:
+        raise ValueError(
+            f"Expected STRUCT, got {feature.type} for feature {feature.name}."
+        )
+    if feature.struct_domain.sparse_feature:
+        raise NotImplementedError(
+            f"Got sparse features in struct domain of {feature.name}. Struct "
+            "features with sparse domains are not supported."
+        )
+    result = {}
+    struct_path = path.ColumnPath((feature.name,))
+    for child_feature in feature.struct_domain.feature:
+        if child_feature.type == schema_pb2.FeatureType.STRUCT:
+            raise NotImplementedError(
+                f"Got STRUCT feature {child_feature.name} in struct domain of "
+                f"{feature.name}. STRUCT features with multiple levels of nestedness "
+                "are not supported."
+            )
+        child_path = struct_path.child(child_feature.name)
+        result[str(child_path)] = schema_pb2.TensorRepresentation(
+            ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor(
+                feature_path=child_path.to_proto()
+            )
+        )
+    return result
 
 
 def _MakeVarLenTensorRepresentation(
-    name: str, ragged: bool) -> schema_pb2.TensorRepresentation:
-  """Constructs TensorRepresentation for a variable length feature."""
-  if ragged:
+    name: str, ragged: bool
+) -> schema_pb2.TensorRepresentation:
+    """Constructs TensorRepresentation for a variable length feature."""
+    if ragged:
+        return schema_pb2.TensorRepresentation(
+            ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor(
+                feature_path=path_pb2.Path(step=[name])
+            )
+        )
     return schema_pb2.TensorRepresentation(
-        ragged_tensor=schema_pb2.TensorRepresentation.RaggedTensor(
-            feature_path=path_pb2.Path(step=[name])))
-  return schema_pb2.TensorRepresentation(
-      varlen_sparse_tensor=schema_pb2.TensorRepresentation.VarLenSparseTensor(
-          column_name=name))
+        varlen_sparse_tensor=schema_pb2.TensorRepresentation.VarLenSparseTensor(
+            column_name=name
+        )
+    )
 
 
 def _InferTensorRepresentationFromSchema(
-    schema: schema_pb2.Schema) -> Dict[str, schema_pb2.TensorRepresentation]:
-  """Translate a Feature proto into a TensorRepresentation proto.
-
-  We apply the following rules:
-    1. If the feature has a fixed shape (set through Feature.shape field),
-       then the feature must always be present (
-       Feature.presence.min_fraction == 1.0), and a DenseTensor representation
-       will be produced for it.
-    2. Otherwise, a VarLenSparseTensor representation will be produced for it.
-
-  Args:
-    schema: a schema_pb2.Schema.
-
-  Returns:
-    A Dict mapping tensor names to their TensorRepresentations.
-
-  Raises:
-    ValueError: if the feature has a fixed shape but is not always present.
-  """
-  result = {}
-  columns_remaining = {f.name: f for f in schema.feature}
-
-  sparse_tensor_repsentations, columns_remaining = (
-      _InferSparseTensorRepresentationsFromSchema(schema, columns_remaining))
-  result.update(sparse_tensor_repsentations)
-
-  for feature in columns_remaining.values():
-    if not _ShouldIncludeFeature(feature):
-      continue
-    if feature.type == schema_pb2.FeatureType.STRUCT:
-      result.update(_InferTensorRepresentationsFromStruct(feature))
-    elif feature.HasField("shape"):
-      if feature.presence.min_fraction != 1:
-        raise ValueError(
-            "Feature {} had shape {} set but min_fraction {} != 1.  Use"
-            " value_count not shape field when min_fraction != 1.".format(
-                feature.name, feature.shape, feature.presence.min_fraction))
-      logging.info("Feature %s has a shape %s. Setting to DenseTensor.",
-                   feature.name, feature.shape)
-      result[feature.name] = schema_pb2.TensorRepresentation(
-          dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
-              column_name=feature.name, shape=feature.shape))
-    else:
-      representation = _MakeVarLenTensorRepresentation(
-          feature.name, schema.represent_variable_length_as_ragged)
-      logging.info("Feature %s has no shape. Setting to %s.", feature.name,
-                   representation.WhichOneof("kind"))
-      result[feature.name] = representation
+    schema: schema_pb2.Schema,
+) -> Dict[str, schema_pb2.TensorRepresentation]:
+    """Translate a Feature proto into a TensorRepresentation proto.
+
+    We apply the following rules:
+      1. If the feature has a fixed shape (set through Feature.shape field),
+         then the feature must always be present (
+         Feature.presence.min_fraction == 1.0), and a DenseTensor representation
+         will be produced for it.
+      2. Otherwise, a VarLenSparseTensor representation will be produced for it.
+
+    Args:
+    ----
+      schema: a schema_pb2.Schema.
+
+    Returns:
+    -------
+      A Dict mapping tensor names to their TensorRepresentations.
+
+    Raises:
+    ------
+      ValueError: if the feature has a fixed shape but is not always present.
+    """
+    result = {}
+    columns_remaining = {f.name: f for f in schema.feature}
+
+    sparse_tensor_repsentations, columns_remaining = (
+        _InferSparseTensorRepresentationsFromSchema(schema, columns_remaining)
+    )
+    result.update(sparse_tensor_repsentations)
+
+    for feature in columns_remaining.values():
+        if not _ShouldIncludeFeature(feature):
+            continue
+        if feature.type == schema_pb2.FeatureType.STRUCT:
+            result.update(_InferTensorRepresentationsFromStruct(feature))
+        elif feature.HasField("shape"):
+            if feature.presence.min_fraction != 1:
+                raise ValueError(
+                    f"Feature {feature.name} had shape {feature.shape} set but min_fraction {feature.presence.min_fraction} != 1.  Use"
+                    " value_count not shape field when min_fraction != 1."
+                )
+            logging.info(
+                "Feature %s has a shape %s. Setting to DenseTensor.",
+                feature.name,
+                feature.shape,
+            )
+            result[feature.name] = schema_pb2.TensorRepresentation(
+                dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
+                    column_name=feature.name, shape=feature.shape
+                )
+            )
+        else:
+            representation = _MakeVarLenTensorRepresentation(
+                feature.name, schema.represent_variable_length_as_ragged
+            )
+            logging.info(
+                "Feature %s has no shape. Setting to %s.",
+                feature.name,
+                representation.WhichOneof("kind"),
+            )
+            result[feature.name] = representation
 
-  return result
+    return result
 
 
 def _InferSparseTensorRepresentationsFromSchema(
     schema: schema_pb2.Schema, columns_remaining: Dict[str, schema_pb2.Feature]
-) -> Tuple[Dict[str, schema_pb2.TensorRepresentation], Dict[
-    str, schema_pb2.Feature]]:
-  """Infers SparseTensor TensorRepresentation from the given schema."""
-  sparse_tensor_representations = {}
-  for sparse_feature in schema.sparse_feature:
-    if not _ShouldIncludeFeature(sparse_feature):
-      continue
-    index_keys = [
-        index_feature.name for index_feature in sparse_feature.index_feature
-    ]
-    index_features = []
-    for index_key in index_keys:
-      try:
-        index_features.append(columns_remaining.pop(index_key))
-      except KeyError:
-        raise ValueError(
-            "sparse_feature {} referred to index feature {} which did not "
-            "exist in the schema".format(sparse_feature.name, index_key))
-
-    value_key = sparse_feature.value_feature.name
-    try:
-      columns_remaining.pop(value_key)
-    except KeyError:
-      raise ValueError(
-          "sparse_feature {} referred to value feature {} which did not "
-          "exist in the schema or was referred to as an index or value multiple "
-          "times.".format(sparse_feature.name, value_key))
-
-    shape = schema_pb2.FixedShape()
-    for index_feature, index_key in zip(index_features, index_keys):
-      if index_feature.HasField("int_domain"):
-        # Currently we only handle O-based INT index features whose minimum
-        # domain value must be zero.
-        if not index_feature.int_domain.HasField("min"):
-          raise ValueError("Cannot determine dense shape of sparse feature "
-                           "{}. The minimum domain value of index feature {}"
-                           " is not set.".format(sparse_feature.name,
-                                                 index_key))
-        if index_feature.int_domain.min != 0:
-          raise ValueError("Only 0-based index features are supported. Sparse "
-                           "feature {} has index feature {} whose minimum "
-                           "domain value is {}.".format(
-                               sparse_feature.name, index_key,
-                               index_feature.int_domain.min))
-
-        if not index_feature.int_domain.HasField("max"):
-          raise ValueError("Cannot determine dense shape of sparse feature "
-                           "{}. The maximum domain value of index feature {}"
-                           " is not set.".format(sparse_feature.name,
-                                                 index_key))
-        shape.dim.add(size=index_feature.int_domain.max + 1)
-      else:
-        # Use -1 to denote unknown dimension size.
-        shape.dim.add(size=-1)
-    # Explicitly set `already_sorted` only when it's set to true in the
-    # `SparseFeature` for backwards compatiblity.
-    sparse_tensor_representations[sparse_feature.name] = (
-        schema_pb2.TensorRepresentation(
-            sparse_tensor=schema_pb2.TensorRepresentation.SparseTensor(
-                dense_shape=shape,
-                index_column_names=index_keys,
-                value_column_name=value_key,
-                already_sorted=sparse_feature.is_sorted or None)))
-
-  return sparse_tensor_representations, columns_remaining
+) -> Tuple[Dict[str, schema_pb2.TensorRepresentation], Dict[str, schema_pb2.Feature]]:
+    """Infers SparseTensor TensorRepresentation from the given schema."""
+    sparse_tensor_representations = {}
+    for sparse_feature in schema.sparse_feature:
+        if not _ShouldIncludeFeature(sparse_feature):
+            continue
+        index_keys = [
+            index_feature.name for index_feature in sparse_feature.index_feature
+        ]
+        index_features = []
+        for index_key in index_keys:
+            try:
+                index_features.append(columns_remaining.pop(index_key))
+            except KeyError:
+                raise ValueError(
+                    f"sparse_feature {sparse_feature.name} referred to index feature {index_key} which did not "
+                    "exist in the schema"
+                )
+
+        value_key = sparse_feature.value_feature.name
+        try:
+            columns_remaining.pop(value_key)
+        except KeyError:
+            raise ValueError(
+                f"sparse_feature {sparse_feature.name} referred to value feature {value_key} which did not "
+                "exist in the schema or was referred to as an index or value multiple "
+                "times."
+            )
+
+        shape = schema_pb2.FixedShape()
+        for index_feature, index_key in zip(index_features, index_keys):
+            if index_feature.HasField("int_domain"):
+                # Currently we only handle O-based INT index features whose minimum
+                # domain value must be zero.
+                if not index_feature.int_domain.HasField("min"):
+                    raise ValueError(
+                        "Cannot determine dense shape of sparse feature "
+                        f"{sparse_feature.name}. The minimum domain value of index feature {index_key}"
+                        " is not set."
+                    )
+                if index_feature.int_domain.min != 0:
+                    raise ValueError(
+                        "Only 0-based index features are supported. Sparse "
+                        f"feature {sparse_feature.name} has index feature {index_key} whose minimum "
+                        f"domain value is {index_feature.int_domain.min}."
+                    )
+
+                if not index_feature.int_domain.HasField("max"):
+                    raise ValueError(
+                        "Cannot determine dense shape of sparse feature "
+                        f"{sparse_feature.name}. The maximum domain value of index feature {index_key}"
+                        " is not set."
+                    )
+                shape.dim.add(size=index_feature.int_domain.max + 1)
+            else:
+                # Use -1 to denote unknown dimension size.
+                shape.dim.add(size=-1)
+        # Explicitly set `already_sorted` only when it's set to true in the
+        # `SparseFeature` for backwards compatiblity.
+        sparse_tensor_representations[sparse_feature.name] = (
+            schema_pb2.TensorRepresentation(
+                sparse_tensor=schema_pb2.TensorRepresentation.SparseTensor(
+                    dense_shape=shape,
+                    index_column_names=index_keys,
+                    value_column_name=value_key,
+                    already_sorted=sparse_feature.is_sorted or None,
+                )
+            )
+        )
+
+    return sparse_tensor_representations, columns_remaining
 
 
 def _ShouldUseLegacyLogic(schema: schema_pb2.Schema) -> bool:
-  if _IS_LEGACY_SCHEMA:
-    return schema.generate_legacy_feature_spec
-  return False
+    if _IS_LEGACY_SCHEMA:
+        return schema.generate_legacy_feature_spec
+    return False
 
 
 def _LegacyInferTensorRepresentationFromSchema(
-    schema: schema_pb2.Schema) -> Dict[str, schema_pb2.TensorRepresentation]:
-  """Translate a Feature proto into a TensorRepresentation proto.
-
-  This function applies heuristics to deduce the shape and other information
-  from a FeatureProto.  The FeatureProto contains information about the feature
-  in an ExampleProto, but the feature spec proto also requires enough
-  information to parse the feature into a tensor.  We apply the following rules:
-
-    1. The shape and representation of the column are determined by the
-       following rules:
-         * if the value_count.min and value_count.max are both 1 then the shape
-           is scalar and the representation is fixed length.
-         * If value_count.min and value_count.max are equal but greater than 1,
-           then the shape is a vector whose length is value_count.max and the
-           representation is fixed length.
-         * If value_count.min and value_count.max are equal and are less than 1,
-           then the shape is a vector of unknown length and the representation
-           is variable length.
-         * If value_count.min and value_count.max are not equal then
-           the shape is a vector of unknown length and the representation is
-           variable length.
-
-    2. If the feature is always present or is variable length (based on the
-        above rule), no default value is set but if the feature is not always
-        present and is fixed length, then a canonical default value is chosen
-        based on _LEGACY_DEFAULT_VALUE_FOR_FEATURE_TYPE.
-
-    3. Features that are deprecated are completely ignored and removed.
-
-  Args:
-    schema: A Schema proto.
-
-  Returns:
-    A Dict mapping tensor names to their TensorRepresentations.
-
-  Raises:
-    ValueError: If the feature's type is not supported or the schema is invalid.
-  """
-  result = {}
-  for feature in schema.feature:
-    if not _ShouldIncludeFeature(feature):
-      continue
-    if feature.type == schema_pb2.FeatureType.STRUCT:
-      result.update(_InferTensorRepresentationsFromStruct(feature))
-      continue
-    # Infer canonical tensorflow dtype.
-    if feature.value_count.min < 0:
-      raise ValueError(
-          "Feature {} has value_count.min < 0 (value was {}).".format(
-              feature.name, feature.value_count.min))
-
-    if feature.value_count.max < 0:
-      raise ValueError(
-          "Feature {} has value_count.max < 0 (value was {}).".format(
-              feature.name, feature.value_count.max))
-
-    # Use heuristics to infer the shape and representation.
-    if (feature.value_count.min == feature.value_count.max and
-        feature.value_count.min == 1):
-      # Case 1: value_count.min == value_count.max == 1.  Infer a DenseTensor
-      # with rank 0 and a default value.
-      logging.info(
-          "Feature %s has value_count.min == value_count.max == 1. Setting to "
-          "DenseTensor.", feature.name)
-      result[feature.name] = schema_pb2.TensorRepresentation(
-          dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
-              column_name=feature.name,
-              shape=schema_pb2.FixedShape(),
-              default_value=_LegacyInferDefaultValue(feature)))
-
-    elif (feature.value_count.min == feature.value_count.max and
-          feature.value_count.min > 1):
-      # Case 2: value_count.min == value_count.max > 1.  Infer a DenseTensor
-      # with rank 1 and a default value.
-      shape = schema_pb2.FixedShape(
-          dim=[schema_pb2.FixedShape.Dim(size=feature.value_count.min)])
-      logging.info(
-          "Feature %s has value_count.min == value_count.max > 1. Setting to "
-          "DenseTensor.", feature.name)
-      result[feature.name] = schema_pb2.TensorRepresentation(
-          dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
-              column_name=feature.name,
-              shape=shape,
-              default_value=_LegacyInferDefaultValue(feature)))
-
-    else:
-      # Case 3: Either value_count.min != value_count.max or
-      # value_count.min == value_count.max == 0.  Infer a VarLenSparseTensor.
-      representation = _MakeVarLenTensorRepresentation(
-          feature.name, schema.represent_variable_length_as_ragged)
-      logging.info(
-          "Feature %s has value_count.min != value_count.max or "
-          "value_count.min == value_count.max == 0. "
-          "Setting to %s.", feature.name, representation.WhichOneof("kind"))
-      result[feature.name] = representation
+    schema: schema_pb2.Schema,
+) -> Dict[str, schema_pb2.TensorRepresentation]:
+    """Translate a Feature proto into a TensorRepresentation proto.
+
+    This function applies heuristics to deduce the shape and other information
+    from a FeatureProto.  The FeatureProto contains information about the feature
+    in an ExampleProto, but the feature spec proto also requires enough
+    information to parse the feature into a tensor.  We apply the following rules:
+
+      1. The shape and representation of the column are determined by the
+         following rules:
+           * if the value_count.min and value_count.max are both 1 then the shape
+             is scalar and the representation is fixed length.
+           * If value_count.min and value_count.max are equal but greater than 1,
+             then the shape is a vector whose length is value_count.max and the
+             representation is fixed length.
+           * If value_count.min and value_count.max are equal and are less than 1,
+             then the shape is a vector of unknown length and the representation
+             is variable length.
+           * If value_count.min and value_count.max are not equal then
+             the shape is a vector of unknown length and the representation is
+             variable length.
+
+      2. If the feature is always present or is variable length (based on the
+          above rule), no default value is set but if the feature is not always
+          present and is fixed length, then a canonical default value is chosen
+          based on _LEGACY_DEFAULT_VALUE_FOR_FEATURE_TYPE.
+
+      3. Features that are deprecated are completely ignored and removed.
+
+    Args:
+    ----
+      schema: A Schema proto.
+
+    Returns:
+    -------
+      A Dict mapping tensor names to their TensorRepresentations.
+
+    Raises:
+    ------
+      ValueError: If the feature's type is not supported or the schema is invalid.
+    """
+    result = {}
+    for feature in schema.feature:
+        if not _ShouldIncludeFeature(feature):
+            continue
+        if feature.type == schema_pb2.FeatureType.STRUCT:
+            result.update(_InferTensorRepresentationsFromStruct(feature))
+            continue
+        # Infer canonical tensorflow dtype.
+        if feature.value_count.min < 0:
+            raise ValueError(
+                f"Feature {feature.name} has value_count.min < 0 (value was {feature.value_count.min})."
+            )
+
+        if feature.value_count.max < 0:
+            raise ValueError(
+                f"Feature {feature.name} has value_count.max < 0 (value was {feature.value_count.max})."
+            )
+
+        # Use heuristics to infer the shape and representation.
+        if (
+            feature.value_count.min == feature.value_count.max
+            and feature.value_count.min == 1
+        ):
+            # Case 1: value_count.min == value_count.max == 1.  Infer a DenseTensor
+            # with rank 0 and a default value.
+            logging.info(
+                "Feature %s has value_count.min == value_count.max == 1. Setting to "
+                "DenseTensor.",
+                feature.name,
+            )
+            result[feature.name] = schema_pb2.TensorRepresentation(
+                dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
+                    column_name=feature.name,
+                    shape=schema_pb2.FixedShape(),
+                    default_value=_LegacyInferDefaultValue(feature),
+                )
+            )
+
+        elif (
+            feature.value_count.min == feature.value_count.max
+            and feature.value_count.min > 1
+        ):
+            # Case 2: value_count.min == value_count.max > 1.  Infer a DenseTensor
+            # with rank 1 and a default value.
+            shape = schema_pb2.FixedShape(
+                dim=[schema_pb2.FixedShape.Dim(size=feature.value_count.min)]
+            )
+            logging.info(
+                "Feature %s has value_count.min == value_count.max > 1. Setting to "
+                "DenseTensor.",
+                feature.name,
+            )
+            result[feature.name] = schema_pb2.TensorRepresentation(
+                dense_tensor=schema_pb2.TensorRepresentation.DenseTensor(
+                    column_name=feature.name,
+                    shape=shape,
+                    default_value=_LegacyInferDefaultValue(feature),
+                )
+            )
+
+        else:
+            # Case 3: Either value_count.min != value_count.max or
+            # value_count.min == value_count.max == 0.  Infer a VarLenSparseTensor.
+            representation = _MakeVarLenTensorRepresentation(
+                feature.name, schema.represent_variable_length_as_ragged
+            )
+            logging.info(
+                "Feature %s has value_count.min != value_count.max or "
+                "value_count.min == value_count.max == 0. "
+                "Setting to %s.",
+                feature.name,
+                representation.WhichOneof("kind"),
+            )
+            result[feature.name] = representation
 
-  return result
+    return result
 
 
 def _LegacyInferDefaultValue(
-    feature_proto: schema_pb2.Feature
+    feature_proto: schema_pb2.Feature,
 ) -> Optional[schema_pb2.TensorRepresentation.DefaultValue]:
-  """Inferrs a default value for a feature."""
-  if feature_proto.presence.min_fraction < 1:
-    default_value = _LEGACY_DEFAULT_VALUE_FOR_FEATURE_TYPE.get(
-        feature_proto.type)
-    if default_value is None:
-      raise ValueError("Unable to infer a default value for feature {}".format(
-          feature_proto))
-    return default_value
-  else:
-    logging.info(
-        "Feature %s has min_fraction = 1 (%s). Not setting default value.",
-        feature_proto.name, feature_proto.presence)
-    return None
+    """Inferrs a default value for a feature."""
+    if feature_proto.presence.min_fraction < 1:
+        default_value = _LEGACY_DEFAULT_VALUE_FOR_FEATURE_TYPE.get(feature_proto.type)
+        if default_value is None:
+            raise ValueError(
+                f"Unable to infer a default value for feature {feature_proto}"
+            )
+        return default_value
+    else:
+        logging.info(
+            "Feature %s has min_fraction = 1 (%s). Not setting default value.",
+            feature_proto.name,
+            feature_proto.presence,
+        )
+        return None
 
 
 def _GetDimsFromFixedShape(shape: schema_pb2.FixedShape) -> List[int]:
-  """Returns a list of dimensions, given a schema_pb2.FixedShape.
+    """Returns a list of dimensions, given a schema_pb2.FixedShape.
 
-  Args:
-    shape: A schema_pb2.FixedShape.
-  """
-  return [dim.size for dim in shape.dim]
+    Args:
+    ----
+      shape: A schema_pb2.FixedShape.
+    """
+    return [dim.size for dim in shape.dim]
 
 
 def _GetDefaultValuesList(
-    unbatched_shape: List[int], feature_type: schema_pb2.FeatureType,
-    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue
+    unbatched_shape: List[int],
+    feature_type: schema_pb2.FeatureType,
+    default_value_proto: schema_pb2.TensorRepresentation.DefaultValue,
 ) -> List[Union[int, float, bytes]]:
-  """Returns a List filled with the default value given in the proto.
+    """Returns a List filled with the default value given in the proto.
+
+    Args:
+    ----
+      unbatched_shape: The shape of the tensor to fill.
+      feature_type: The expected type of the default_value.
+      default_value_proto: The DefaultValue proto that holds the default_value.
+
+    Raises:
+    ------
+      ValueError: if the default_value is incompatible with feature_type.
+    """
+    kind = default_value_proto.WhichOneof("kind")
+    default_value = getattr(default_value_proto, kind)
+    expected_feature_type = _DEFAULT_VALUE_KIND_TO_FEATURE_TYPE.get(kind)
+    if feature_type != expected_feature_type:
+        raise ValueError(
+            f"FeatureType: {schema_pb2.FeatureType.Name(feature_type)} is incompatible with default_value: {default_value}"
+        )
+    size = int(np.prod(unbatched_shape, initial=1))
 
-  Args:
-    unbatched_shape: The shape of the tensor to fill.
-    feature_type: The expected type of the default_value.
-    default_value_proto: The DefaultValue proto that holds the default_value.
+    return [default_value] * size
 
-  Raises:
-    ValueError: if the default_value is incompatible with feature_type.
-  """
-  kind = default_value_proto.WhichOneof("kind")
-  default_value = getattr(default_value_proto, kind)
-  expected_feature_type = _DEFAULT_VALUE_KIND_TO_FEATURE_TYPE.get(kind, None)
-  if feature_type != expected_feature_type:
-    raise ValueError(
-        "FeatureType: {} is incompatible with default_value: {}".format(
-            schema_pb2.FeatureType.Name(feature_type), default_value))
-  size = int(np.prod(unbatched_shape, initial=1))
 
-  return [default_value] * size
+def ProjectTensorRepresentationsInSchema(
+    schema: schema_pb2.Schema, tensor_names: Iterable[str]
+) -> schema_pb2.Schema:
+    """Returns a projection of schema by the given tensor names.
+
+    Tries to extract TensorRpresentations from the schema and infers them in case
+    there's none. The schema is then projected to have the TensorRepresentations
+    and source feature columns of tensors that are present in `tensor_names`.
+
+    Args:
+    ----
+      schema: A TFMD Schema to be projected.
+      tensor_names: Names of tensors that schema must be projected on.
+
+    Returns:
+    -------
+      A schema that contains a subset of TensorRepresentations and features in
+      `schema` that is a set of source columns for the given tensors.
+
+    Raises:
+    ------
+      ValueError: if `schema` doesn't contain any of the given `tensor_names` or
+      TensorRepresentations' source columns are not present in `schema` features.
+    """
+    tensor_representations = GetTensorRepresentationsFromSchema(schema)
+    if tensor_representations is None:
+        tensor_representations = InferTensorRepresentationsFromSchema(schema)
+    tensor_names = set(tensor_names)
+    if not tensor_names.issubset(tensor_representations):
+        raise ValueError(
+            f"Unable to project {tensor_names - tensor_representations.keys()} because they were not in the original "
+            "or inferred TensorRepresentations."
+        )
+    paths = set()
+    for tensor_name in tensor_names:
+        paths.update(
+            GetSourceColumnsFromTensorRepresentation(
+                tensor_representations[tensor_name]
+            )
+        )
+    result = schema_pb2.Schema()
+
+    for feature in schema.feature:
+        feature_path = path.ColumnPath(feature.name)
+        if feature_path in paths:
+            paths.remove(feature_path)
+            result.feature.add().CopyFrom(feature)
+
+    if paths:
+        raise ValueError(
+            f"TensorRepresentations source columns {paths} are not present "
+            "in the schema."
+        )
 
+    SetTensorRepresentationsInSchema(
+        result, {k: v for k, v in tensor_representations.items() if k in tensor_names}
+    )
 
-def ProjectTensorRepresentationsInSchema(
-    schema: schema_pb2.Schema,
-    tensor_names: Iterable[str]) -> schema_pb2.Schema:
-  """Returns a projection of schema by the given tensor names.
-
-  Tries to extract TensorRpresentations from the schema and infers them in case
-  there's none. The schema is then projected to have the TensorRepresentations
-  and source feature columns of tensors that are present in `tensor_names`.
-
-  Args:
-    schema: A TFMD Schema to be projected.
-    tensor_names: Names of tensors that schema must be projected on.
-
-  Returns:
-    A schema that contains a subset of TensorRepresentations and features in
-    `schema` that is a set of source columns for the given tensors.
-
-  Raises:
-    ValueError: if `schema` doesn't contain any of the given `tensor_names` or
-    TensorRepresentations' source columns are not present in `schema` features.
-  """
-  tensor_representations = GetTensorRepresentationsFromSchema(schema)
-  if tensor_representations is None:
-    tensor_representations = InferTensorRepresentationsFromSchema(schema)
-  tensor_names = set(tensor_names)
-  if not tensor_names.issubset(tensor_representations):
-    raise ValueError(
-        "Unable to project {} because they were not in the original "
-        "or inferred TensorRepresentations.".format(
-            tensor_names - tensor_representations.keys()))
-  paths = set()
-  for tensor_name in tensor_names:
-    paths.update(
-        GetSourceColumnsFromTensorRepresentation(
-            tensor_representations[tensor_name]))
-  result = schema_pb2.Schema()
-
-  for feature in schema.feature:
-    feature_path = path.ColumnPath(feature.name)
-    if feature_path in paths:
-      paths.remove(feature_path)
-      result.feature.add().CopyFrom(feature)
-
-  if paths:
-    raise ValueError("TensorRepresentations source columns {} are not present "
-                     "in the schema.".format(paths))
-
-  SetTensorRepresentationsInSchema(
-      result,
-      {k: v for k, v in tensor_representations.items() if k in tensor_names})
-
-  return result
-
-
-def _GetSourceColumnsFromFeature(
-    feature: schema_pb2.Feature) -> List[path.ColumnPath]:
-  """Extracts all Feature paths from a potentially nested Feature."""
-  if feature.type == schema_pb2.FeatureType.STRUCT:
-    result = []
-    for child_feature in feature.struct_domain.feature:
-      result.extend(
-          path.ColumnPath([feature.name] + list(child_path.steps()))
-          for child_path in _GetSourceColumnsFromFeature(child_feature))
     return result
-  else:
-    return [path.ColumnPath(feature.name)]
+
+
+def _GetSourceColumnsFromFeature(feature: schema_pb2.Feature) -> List[path.ColumnPath]:
+    """Extracts all Feature paths from a potentially nested Feature."""
+    if feature.type == schema_pb2.FeatureType.STRUCT:
+        result = []
+        for child_feature in feature.struct_domain.feature:
+            result.extend(
+                path.ColumnPath([feature.name] + list(child_path.steps()))
+                for child_path in _GetSourceColumnsFromFeature(child_feature)
+            )
+        return result
+    else:
+        return [path.ColumnPath(feature.name)]
 
 
 def ValidateTensorRepresentationsInSchema(
     schema: schema_pb2.Schema,
-    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP
+    tensor_representation_group_name: str = _DEFAULT_TENSOR_REPRESENTATION_GROUP,
 ):
-  """Checks that TensorRepresentations refer all schema features at least once.
-
-  Args:
-    schema: A TFMD Schema proto.
-    tensor_representation_group_name: (optional) the name of the group to look
-      for. If not provided, looks for the default name.
-
-  Raises:
-    ValueError: If either of the following is true
-      * there's no TensorRepresentationGroup with the given name;
-      * TensorRepresentations refer to a feature that is not in the schema;
-      * feature exists in the schema, but is not referred to by any
-        TensorRepresentation.
-  """
-  tensor_representations = GetTensorRepresentationsFromSchema(
-      schema, tensor_representation_group_name)
-  if tensor_representations is None:
-    raise ValueError(
-        "TensorRepresentations are not found in the schema. Did you specify "
-        "correct group name?")
-  source_features = set()
-  for representation in tensor_representations.values():
-    source_features.update(
-        GetSourceColumnsFromTensorRepresentation(representation))
-  all_features = set()
-  for feature in schema.feature:
-    all_features.update(_GetSourceColumnsFromFeature(feature))
-  source_not_in_schema = source_features - all_features
-  if source_not_in_schema:
-    raise ValueError(
-        f"Features referred in TensorRepresentations but not found in the "
-        f"schema: {source_not_in_schema}")
-  in_schema_not_source = all_features - source_features
-  if in_schema_not_source:
-    raise ValueError(f"Features present in the schema but not referred in any "
-                     f"TensorRepresentation: {in_schema_not_source}")
+    """Checks that TensorRepresentations refer all schema features at least once.
+
+    Args:
+    ----
+      schema: A TFMD Schema proto.
+      tensor_representation_group_name: (optional) the name of the group to look
+        for. If not provided, looks for the default name.
+
+    Raises:
+    ------
+      ValueError: If either of the following is true
+        * there's no TensorRepresentationGroup with the given name;
+        * TensorRepresentations refer to a feature that is not in the schema;
+        * feature exists in the schema, but is not referred to by any
+          TensorRepresentation.
+    """
+    tensor_representations = GetTensorRepresentationsFromSchema(
+        schema, tensor_representation_group_name
+    )
+    if tensor_representations is None:
+        raise ValueError(
+            "TensorRepresentations are not found in the schema. Did you specify "
+            "correct group name?"
+        )
+    source_features = set()
+    for representation in tensor_representations.values():
+        source_features.update(GetSourceColumnsFromTensorRepresentation(representation))
+    all_features = set()
+    for feature in schema.feature:
+        all_features.update(_GetSourceColumnsFromFeature(feature))
+    source_not_in_schema = source_features - all_features
+    if source_not_in_schema:
+        raise ValueError(
+            f"Features referred in TensorRepresentations but not found in the "
+            f"schema: {source_not_in_schema}"
+        )
+    in_schema_not_source = all_features - source_features
+    if in_schema_not_source:
+        raise ValueError(
+            f"Features present in the schema but not referred in any "
+            f"TensorRepresentation: {in_schema_not_source}"
+        )
diff --git a/tfx_bsl/tfxio/tensor_representation_util_test.py b/tfx_bsl/tfxio/tensor_representation_util_test.py
index c50c6c12..8c8efb09 100644
--- a/tfx_bsl/tfxio/tensor_representation_util_test.py
+++ b/tfx_bsl/tfxio/tensor_representation_util_test.py
@@ -15,50 +15,55 @@
 
 import numpy as np
 import tensorflow as tf
+from absl.testing import absltest, parameterized
+from google.protobuf import text_format
+from tensorflow.python.framework import (
+    test_util,  # pylint: disable=g-direct-tensorflow-import
+)
+from tensorflow_metadata.proto.v0 import schema_pb2
 
 from tfx_bsl.arrow import path
 from tfx_bsl.tfxio import tensor_representation_util
 
-from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
-from tensorflow.python.framework import test_util  # pylint: disable=g-direct-tensorflow-import
-from tensorflow_metadata.proto.v0 import schema_pb2
-
-_IS_LEGACY_SCHEMA = ('generate_legacy_feature_spec' in
-                     schema_pb2.Schema.DESCRIPTOR.fields_by_name)
+_IS_LEGACY_SCHEMA = (
+    "generate_legacy_feature_spec" in schema_pb2.Schema.DESCRIPTOR.fields_by_name
+)
 
 _ALL_EXAMPLE_CODER_TYPES = {
     schema_pb2.FeatureType.INT: tf.int64,
     schema_pb2.FeatureType.FLOAT: tf.float32,
-    schema_pb2.FeatureType.BYTES: tf.string
+    schema_pb2.FeatureType.BYTES: tf.string,
 }
 
 
 def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
-  result = []
-  for stage in tensor_representation_util._DISQUALIFYING_LIFECYCLE_STAGES:
-    stage_name = schema_pb2.LifecycleStage.Name(stage)
-    testcase_name_prefix = 'legacy' if generate_legacy_feature_spec else ''
-    result.append(
-        dict(
-            testcase_name=(
-                f'{testcase_name_prefix}_disqualified_feature_{stage_name}'),
-            ascii_proto=f"""
+    result = []
+    for stage in tensor_representation_util._DISQUALIFYING_LIFECYCLE_STAGES:
+        stage_name = schema_pb2.LifecycleStage.Name(stage)
+        testcase_name_prefix = "legacy" if generate_legacy_feature_spec else ""
+        result.append(
+            dict(
+                testcase_name=(
+                    f"{testcase_name_prefix}_disqualified_feature_{stage_name}"
+                ),
+                ascii_proto=f"""
                        feature {{
                          name: "feature"
                          type: INT
                          lifecycle_stage: {stage_name}
                        }}
                        """,
-            expected={},
-            generate_legacy_feature_spec=generate_legacy_feature_spec))
-  return result
+                expected={},
+                generate_legacy_feature_spec=generate_legacy_feature_spec,
+            )
+        )
+    return result
+
 
 _INFER_TEST_CASES = [
     # Test different shapes
     dict(
-        testcase_name='fixed_len_vector',
+        testcase_name="fixed_len_vector",
         ascii_proto="""
           feature: {
             name: "x" type: INT shape: {dim {size: 1}}
@@ -66,8 +71,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -76,9 +80,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
               }"""
-        }),
+        },
+    ),
     dict(
-        testcase_name='fixed_len_matrix',
+        testcase_name="fixed_len_matrix",
         ascii_proto="""
           feature: {
             name: "x" type: INT shape: {dim {size: 2} dim {size: 2}}
@@ -86,8 +91,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -99,36 +103,37 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
               }"""
-        }),
+        },
+    ),
     dict(
-        testcase_name='var_len',
+        testcase_name="var_len",
         ascii_proto="""feature: {name: "x" type: INT}""",
         expected={
-            'x':
-                """
+            "x": """
               varlen_sparse_tensor {
                 column_name: "x"
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='var_len_ragged',
+        testcase_name="var_len_ragged",
         ascii_proto="""
           feature: {name: "x" type: INT}
           represent_variable_length_as_ragged: true
           """,
         expected={
-            'x':
-                """
+            "x": """
               ragged_tensor {
                 feature_path {
                   step: "x"
                 }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='sparse',
+        testcase_name="sparse",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -145,8 +150,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }""",
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 index_column_names: ["index_key"]
                 value_column_name: "value_key"
@@ -157,9 +161,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                 }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='sparse_already_sorted',
+        testcase_name="sparse_already_sorted",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -177,8 +182,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             is_sorted: true
           }""",
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 index_column_names: ["index_key"]
                 value_column_name: "value_key"
@@ -190,9 +194,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                 already_sorted: true
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='sparse_already_sorted_false',
+        testcase_name="sparse_already_sorted_false",
         ascii_proto="""
           feature {
             name: "value_key"
@@ -204,23 +209,24 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             is_sorted: false
           }""",
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 value_column_name: "value_key"
                 dense_shape {
                 }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='deprecated_feature',
+        testcase_name="deprecated_feature",
         ascii_proto="""
           feature: {name: "x" type: INT lifecycle_stage: DEPRECATED}
         """,
-        expected={}),
+        expected={},
+    ),
     dict(
-        testcase_name='sparse_feature_rank_0',
+        testcase_name="sparse_feature_rank_0",
         ascii_proto="""
           feature {
             name: "value_key"
@@ -232,16 +238,16 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 value_column_name: "value_key"
                 dense_shape { }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='sparse_feature_rank_2',
+        testcase_name="sparse_feature_rank_2",
         ascii_proto="""
           feature {
             name: "index_key_1"
@@ -265,8 +271,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 index_column_names: "index_key_1"
                 index_column_names: "index_key_2"
@@ -281,9 +286,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                 }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='sparse_feature_no_index_int_domain',
+        testcase_name="sparse_feature_no_index_int_domain",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -300,8 +306,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               sparse_tensor {
                 index_column_names: "index_key"
                 value_column_name: "value_key"
@@ -312,9 +317,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                 }
               }
             """
-        }),
+        },
+    ),
     dict(
-        testcase_name='struct',
+        testcase_name="struct",
         ascii_proto="""
           feature {
             name: "##SEQUENCE##"
@@ -339,8 +345,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }""",
         expected={
-            '##SEQUENCE##.int_feature':
-                """
+            "##SEQUENCE##.int_feature": """
                 ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
@@ -348,21 +353,21 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
                 """,
-            '##SEQUENCE##.string_feature':
-                """
+            "##SEQUENCE##.string_feature": """
                 ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
                     step: "string_feature"
                   }
                 }
-                """
-        }),
+                """,
+        },
+    ),
 ] + _DisqualifiedFeatureTestCases()
 
 _LEGACY_INFER_TEST_CASES = [
     dict(
-        testcase_name='fixed_len_scalar_no_default_legacy',
+        testcase_name="fixed_len_scalar_no_default_legacy",
         ascii_proto="""
           feature: {
             name: "dummy" type: INT value_count: {min: 1 max: 1}
@@ -374,8 +379,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'dummy':
-                """
+            "dummy": """
               dense_tensor {
                 column_name: "dummy"
                 shape {
@@ -384,8 +388,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   int_value: -1
                 }
               }""",
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -395,7 +398,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='fixed_len_vector_no_default_legacy',
+        testcase_name="fixed_len_vector_no_default_legacy",
         ascii_proto="""
           feature: {
             name: "dummy" type: INT value_count: {min: 1 max: 1}
@@ -407,8 +410,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'dummy':
-                """
+            "dummy": """
               dense_tensor {
                 column_name: "dummy"
                 shape {
@@ -417,8 +419,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   int_value: -1
                 }
               }""",
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -431,7 +432,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='var_len_legacy',
+        testcase_name="var_len_legacy",
         ascii_proto="""
           feature: {
             name: "dummy" type: INT value_count: {min: 1 max: 1}
@@ -442,8 +443,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'dummy':
-                """
+            "dummy": """
               dense_tensor {
                 column_name: "dummy"
                 shape {
@@ -452,17 +452,16 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   int_value: -1
                 }
               }""",
-            'x':
-                """
+            "x": """
               varlen_sparse_tensor {
                 column_name: "x"
               }
-            """
+            """,
         },
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='var_len_ragged_legacy',
+        testcase_name="var_len_ragged_legacy",
         ascii_proto="""
           feature: {
             name: "x" type: INT
@@ -470,8 +469,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           represent_variable_length_as_ragged: true
         """,
         expected={
-            'x':
-                """
+            "x": """
               ragged_tensor {
                 feature_path {
                   step: "x"
@@ -482,7 +480,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='fixed_len_scalar_int_with_default_legacy',
+        testcase_name="fixed_len_scalar_int_with_default_legacy",
         ascii_proto="""
           feature: {
             name: "x" type: INT value_count: {min: 1 max: 1}
@@ -490,8 +488,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -505,7 +502,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='fixed_len_scalar_string_with_default_legacy',
+        testcase_name="fixed_len_scalar_string_with_default_legacy",
         ascii_proto="""
           feature: {
             name: "x" type: BYTES value_count: {min: 1 max: 1}
@@ -513,8 +510,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -528,7 +524,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='fixed_len_scalar_float_with_default_legacy',
+        testcase_name="fixed_len_scalar_float_with_default_legacy",
         ascii_proto="""
           feature: {
             name: "x" type: FLOAT value_count: {min: 1 max: 1}
@@ -536,8 +532,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -551,14 +546,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         generate_legacy_feature_spec=True,
     ),
     dict(
-        testcase_name='deprecated_feature_legacy',
+        testcase_name="deprecated_feature_legacy",
         ascii_proto="""
           feature: {name: "x" type: INT deprecated: true}
         """,
         expected={},
     ),
     dict(
-        testcase_name='struct_legacy',
+        testcase_name="struct_legacy",
         ascii_proto="""
           feature {
             name: "##SEQUENCE##"
@@ -583,8 +578,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }""",
         expected={
-            '##SEQUENCE##.int_feature':
-                """
+            "##SEQUENCE##.int_feature": """
                 ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
@@ -592,8 +586,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
                 """,
-            '##SEQUENCE##.string_feature':
-                """
+            "##SEQUENCE##.string_feature": """
                 ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
@@ -608,7 +601,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
 
 _MIXED_SCHEMA_INFER_TEST_CASES = [
     dict(
-        testcase_name='fixed_len_vector_and_tensor_representation',
+        testcase_name="fixed_len_vector_and_tensor_representation",
         ascii_proto="""
           feature: {
             name: "x" type: INT shape: {dim {size: 1}}
@@ -633,8 +626,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -643,17 +635,16 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
               }""",
-            'y':
-                """
+            "y": """
                 ragged_tensor {
                   feature_path { step: "value" }
                 }
-                """
+                """,
         },
         schema_is_mixed=True,
     ),
     dict(
-        testcase_name='tensor_representation_only',
+        testcase_name="tensor_representation_only",
         ascii_proto="""
           tensor_representation_group {
             key: ""
@@ -670,8 +661,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'y':
-                """
+            "y": """
                 ragged_tensor {
                   feature_path { step: "value" }
                 }
@@ -680,7 +670,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         schema_is_mixed=True,
     ),
     dict(
-        testcase_name='feature_and_tensor_representation_same_name',
+        testcase_name="feature_and_tensor_representation_same_name",
         ascii_proto="""
           feature: {
             name: "x" type: INT shape: {dim {size: 1}}
@@ -709,8 +699,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'x':
-                """
+            "x": """
               dense_tensor {
                 column_name: "x"
                 shape {
@@ -719,8 +708,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
                   }
                 }
               }""",
-            'y':
-                """
+            "y": """
                 ragged_tensor {
                   feature_path { step: "value" }
                 }
@@ -729,7 +717,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         schema_is_mixed=True,
     ),
     dict(
-        testcase_name='source_column_and_tensor_representation_same_name',
+        testcase_name="source_column_and_tensor_representation_same_name",
         ascii_proto="""
           feature: {
             name: "y" type: INT shape: {dim {size: 1}}
@@ -750,8 +738,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'y':
-                """
+            "y": """
                 ragged_tensor {
                   feature_path { step: "y" }
                 }
@@ -760,7 +747,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         schema_is_mixed=True,
     ),
     dict(
-        testcase_name='struct_feature',
+        testcase_name="struct_feature",
         ascii_proto="""
           feature {
             name: "int_feature"
@@ -806,21 +793,18 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'int_feature':
-                """ragged_tensor {
+            "int_feature": """ragged_tensor {
                   feature_path {
                     step: "int_feature"
                   }
                 }""",
-            '##SEQUENCE##.string_feature':
-                """ragged_tensor {
+            "##SEQUENCE##.string_feature": """ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
                     step: "string_feature"
                   }
                 }""",
-            '##SEQUENCE##.int_feature':
-                """ragged_tensor {
+            "##SEQUENCE##.int_feature": """ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
                     step: "int_feature"
@@ -830,7 +814,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         schema_is_mixed=True,
     ),
     dict(
-        testcase_name='struct_feature_with_tensor_representations',
+        testcase_name="struct_feature_with_tensor_representations",
         ascii_proto="""
           feature {
             name: "int_feature"
@@ -883,21 +867,18 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         expected={
-            'int_feature':
-                """
+            "int_feature": """
                 varlen_sparse_tensor {
                   column_name: "int_feature"
                 }
                 """,
-            'seq_string_feature':
-                """ragged_tensor {
+            "seq_string_feature": """ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
                     step: "string_feature"
                   }
                 }""",
-            'seq_int_feature':
-                """ragged_tensor {
+            "seq_int_feature": """ragged_tensor {
                   feature_path {
                     step: "##SEQUENCE##"
                     step: "int_feature"
@@ -910,7 +891,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
 
 _INVALID_SCHEMA_INFER_TEST_CASES = [
     dict(
-        testcase_name='sparse_feature_no_index_int_domain_min',
+        testcase_name="sparse_feature_no_index_int_domain_min",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -927,11 +908,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
           """,
-        error_msg=(r'Cannot determine dense shape of sparse feature x. '
-                   r'The minimum domain value of index feature index_key'
-                   r' is not set.')),
+        error_msg=(
+            r"Cannot determine dense shape of sparse feature x. "
+            r"The minimum domain value of index feature index_key"
+            r" is not set."
+        ),
+    ),
     dict(
-        testcase_name='sparse_feature_non_zero_index_int_domain_min',
+        testcase_name="sparse_feature_non_zero_index_int_domain_min",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -948,11 +932,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
           """,
-        error_msg=(r'Only 0-based index features are supported. Sparse '
-                   r'feature x has index feature index_key whose '
-                   r'minimum domain value is 1')),
+        error_msg=(
+            r"Only 0-based index features are supported. Sparse "
+            r"feature x has index feature index_key whose "
+            r"minimum domain value is 1"
+        ),
+    ),
     dict(
-        testcase_name='sparse_feature_no_index_int_domain_max',
+        testcase_name="sparse_feature_no_index_int_domain_max",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -969,11 +956,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
           """,
-        error_msg=(r'Cannot determine dense shape of sparse feature x. '
-                   r'The maximum domain value of index feature index_key'
-                   r' is not set.')),
+        error_msg=(
+            r"Cannot determine dense shape of sparse feature x. "
+            r"The maximum domain value of index feature index_key"
+            r" is not set."
+        ),
+    ),
     dict(
-        testcase_name='sparse_feature_missing_index_key',
+        testcase_name="sparse_feature_missing_index_key",
         ascii_proto="""
           feature {
             name: "value_key"
@@ -986,10 +976,13 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
         """,
-        error_msg=(r'sparse_feature x referred to index feature '
-                   r'index_key which did not exist in the schema')),
+        error_msg=(
+            r"sparse_feature x referred to index feature "
+            r"index_key which did not exist in the schema"
+        ),
+    ),
     dict(
-        testcase_name='sparse_feature_missing_value_key',
+        testcase_name="sparse_feature_missing_value_key",
         ascii_proto="""
           feature {
             name: "index_key"
@@ -1003,18 +996,21 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
         """,
-        error_msg=(r'sparse_feature x referred to value feature '
-                   r'value_key which did not exist in the schema')),
+        error_msg=(
+            r"sparse_feature x referred to value feature "
+            r"value_key which did not exist in the schema"
+        ),
+    ),
 ]
 
 _GET_SOURCE_COLUMNS_TEST_CASES = [
     dict(
-        testcase_name='oneof_unspecified',
-        pbtxt='',
+        testcase_name="oneof_unspecified",
+        pbtxt="",
         expected=[],
     ),
     dict(
-        testcase_name='dense_tensor',
+        testcase_name="dense_tensor",
         pbtxt="""
             dense_tensor {
               column_name: "my_column"
@@ -1022,19 +1018,19 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
               }
             }
         """,
-        expected=[['my_column']],
+        expected=[["my_column"]],
     ),
     dict(
-        testcase_name='varlen_sparse_tensor',
+        testcase_name="varlen_sparse_tensor",
         pbtxt="""
          varlen_sparse_tensor {
            column_name: "my_column"
          }
          """,
-        expected=[['my_column']],
+        expected=[["my_column"]],
     ),
     dict(
-        testcase_name='sparse_tensor',
+        testcase_name="sparse_tensor",
         pbtxt="""
           sparse_tensor {
             index_column_names: "idx1"
@@ -1042,10 +1038,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_column_name: "value"
           }
         """,
-        expected=[['idx1'], ['idx2'], ['value']],
+        expected=[["idx1"], ["idx2"], ["value"]],
     ),
     dict(
-        testcase_name='ragged_tensor',
+        testcase_name="ragged_tensor",
         pbtxt="""
           ragged_tensor {
             feature_path {
@@ -1056,13 +1052,13 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             partition { row_length: "row_length_2" }
           }
         """,
-        expected=[['value'], ['row_length_1'], ['row_length_2']],
+        expected=[["value"], ["row_length_1"], ["row_length_2"]],
     ),
 ]
 
 _GET_SOURCE_VALUE_COLUMNS_TEST_CASES = [
     dict(
-        testcase_name='dense_tensor',
+        testcase_name="dense_tensor",
         pbtxt="""
             dense_tensor {
               column_name: "my_column"
@@ -1070,19 +1066,19 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
               }
             }
         """,
-        expected='my_column',
+        expected="my_column",
     ),
     dict(
-        testcase_name='varlen_sparse_tensor',
+        testcase_name="varlen_sparse_tensor",
         pbtxt="""
          varlen_sparse_tensor {
            column_name: "my_column"
          }
          """,
-        expected='my_column',
+        expected="my_column",
     ),
     dict(
-        testcase_name='sparse_tensor',
+        testcase_name="sparse_tensor",
         pbtxt="""
           sparse_tensor {
             index_column_names: "idx1"
@@ -1090,10 +1086,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_column_name: "value"
           }
         """,
-        expected='value',
+        expected="value",
     ),
     dict(
-        testcase_name='ragged_tensor',
+        testcase_name="ragged_tensor",
         pbtxt="""
           ragged_tensor {
             feature_path {
@@ -1102,13 +1098,13 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             partition { row_length: "row_length" }
           }
         """,
-        expected='my_column',
+        expected="my_column",
     ),
 ]
 
 _PROJECT_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES = [
     dict(
-        testcase_name='simple_features',
+        testcase_name="simple_features",
         schema="""
           feature {
             name: "int_feature"
@@ -1125,7 +1121,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             type: BYTES
           }
         """,
-        tensor_names=('int_feature', 'string_feature'),
+        tensor_names=("int_feature", "string_feature"),
         expected_projection="""
           feature {
             name: "int_feature"
@@ -1160,7 +1156,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='simple_tensor_representations',
+        testcase_name="simple_tensor_representations",
         schema="""
           feature {
             name: "int_feature"
@@ -1204,7 +1200,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_names=('float_feature', 'string_feature'),
+        tensor_names=("float_feature", "string_feature"),
         expected_projection="""
           feature {
             name: "float_feature"
@@ -1238,7 +1234,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='composite_tensor_representations',
+        testcase_name="composite_tensor_representations",
         schema="""
           feature {
             name: "value_key"
@@ -1275,7 +1271,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_names=('x',),
+        tensor_names=("x",),
         expected_projection="""
           feature {
             name: "value_key"
@@ -1302,7 +1298,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='refer_same_source_column',
+        testcase_name="refer_same_source_column",
         schema="""
           feature {
             name: "value_key"
@@ -1335,7 +1331,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_names=('x',),
+        tensor_names=("x",),
         expected_projection="""
           feature {
             name: "value_key"
@@ -1359,9 +1355,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
               }
             }
           }
-        """),
+        """,
+    ),
     dict(
-        testcase_name='missing_source_column',
+        testcase_name="missing_source_column",
         schema="""
           feature {
             name: "value_key"
@@ -1394,13 +1391,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_names=('x',),
+        tensor_names=("x",),
         expected_error=(
-            'TensorRepresentations source columns {index_key} are '
-            'not present in the schema.'),
+            "TensorRepresentations source columns {index_key} are "
+            "not present in the schema."
+        ),
     ),
     dict(
-        testcase_name='missing_tensor_representation',
+        testcase_name="missing_tensor_representation",
         schema="""
           feature {
             name: "int_feature"
@@ -1424,13 +1422,14 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_names=('int_feature',),
+        tensor_names=("int_feature",),
         expected_error=(
             "Unable to project {'int_feature'} because they were not in the "
-            'original or inferred TensorRepresentations.'),
+            "original or inferred TensorRepresentations."
+        ),
     ),
     dict(
-        testcase_name='sparse_feature',
+        testcase_name="sparse_feature",
         schema="""
           feature {
             name: "value_key"
@@ -1452,7 +1451,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
         """,
-        tensor_names=('x',),
+        tensor_names=("x",),
         expected_projection="""
           feature {
             name: "value_key"
@@ -1486,9 +1485,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
               }
             }
           }
-        """),
+        """,
+    ),
     dict(
-        testcase_name='sparse_feature_legacy',
+        testcase_name="sparse_feature_legacy",
         schema="""
           feature {
             name: "value_key"
@@ -1510,17 +1510,18 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             value_feature {name: "value_key"}
           }
         """,
-        tensor_names=('x',),
+        tensor_names=("x",),
         expected_error=(
             "Unable to project {'x'} because they were not in the "
-            'original or inferred TensorRepresentations.'
+            "original or inferred TensorRepresentations."
         ),  # sparse_feature inference is not implemented in legacy logic.
-        generate_legacy_feature_spec=True),
+        generate_legacy_feature_spec=True,
+    ),
 ]
 
 _VALIDATE_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES = [
     dict(
-        testcase_name='_simple_features',
+        testcase_name="_simple_features",
         schema="""
           feature {
             name: "int_feature"
@@ -1537,10 +1538,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             type: BYTES
           }
         """,
-        error='TensorRepresentations are not found in the schema.',
+        error="TensorRepresentations are not found in the schema.",
     ),
     dict(
-        testcase_name='_simple_tensor_representations',
+        testcase_name="_simple_tensor_representations",
         schema="""
           feature {
             name: "int_feature"
@@ -1586,7 +1587,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='_composite_tensor_representations',
+        testcase_name="_composite_tensor_representations",
         schema="""
           feature {
             name: "value_key"
@@ -1623,10 +1624,10 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        tensor_representation_group_name='group_name',
+        tensor_representation_group_name="group_name",
     ),
     dict(
-        testcase_name='_struct_features',
+        testcase_name="_struct_features",
         schema="""
         feature {
           name: "int_feature"
@@ -1704,7 +1705,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
       """,
     ),
     dict(
-        testcase_name='_tensor_representations_with_sparse_feature',
+        testcase_name="_tensor_representations_with_sparse_feature",
         schema="""
           feature {
             name: "value_key"
@@ -1737,7 +1738,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='_refer_same_source_column',
+        testcase_name="_refer_same_source_column",
         schema="""
           feature {
             name: "value_key"
@@ -1772,7 +1773,7 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
         """,
     ),
     dict(
-        testcase_name='_missing_source_column',
+        testcase_name="_missing_source_column",
         schema="""
           feature {
             name: "value_key"
@@ -1806,11 +1807,12 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
           }
         """,
         error=(
-            'Features referred in TensorRepresentations but not found in the '
-            'schema: {index_key}'),
+            "Features referred in TensorRepresentations but not found in the "
+            "schema: {index_key}"
+        ),
     ),
     dict(
-        testcase_name='_missing_tensor_representation',
+        testcase_name="_missing_tensor_representation",
         schema="""
           feature {
             name: "int_feature"
@@ -1834,15 +1836,17 @@ def _DisqualifiedFeatureTestCases(generate_legacy_feature_spec=False):
             }
           }
         """,
-        error=('Features present in the schema but not referred in any '
-               'TensorRepresentation: {int_feature}'),
+        error=(
+            "Features present in the schema but not referred in any "
+            "TensorRepresentation: {int_feature}"
+        ),
     ),
 ]
 
 
 def _MakeFixedLenFeatureTestCases():
-  result = []
-  base_tensor_rep_textpb = """
+    result = []
+    base_tensor_rep_textpb = """
                 dense_tensor {{
                   column_name: "dense_column"
                   shape {{
@@ -1854,43 +1858,42 @@ def _MakeFixedLenFeatureTestCases():
                     {0}
                   }}
                 }}"""
-  for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
-    if t == schema_pb2.FeatureType.FLOAT:
-      default_value_textpb = 'float_value: 1.0'
-      default_values = [1.0, 1.0]
-      expected_parsed_results = np.array([1.0, 1.0])
-    elif t == schema_pb2.FeatureType.INT:
-      default_value_textpb = 'int_value: 1'
-      default_values = [1, 1]
-      expected_parsed_results = np.array([1, 1])
-    elif t == schema_pb2.FeatureType.BYTES:
-      default_value_textpb = 'bytes_value: "default"'
-      default_values = [b'default', b'default']
-      expected_parsed_results = np.array([b'default', b'default'])
+    for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
+        if t == schema_pb2.FeatureType.FLOAT:
+            default_value_textpb = "float_value: 1.0"
+            default_values = [1.0, 1.0]
+            expected_parsed_results = np.array([1.0, 1.0])
+        elif t == schema_pb2.FeatureType.INT:
+            default_value_textpb = "int_value: 1"
+            default_values = [1, 1]
+            expected_parsed_results = np.array([1, 1])
+        elif t == schema_pb2.FeatureType.BYTES:
+            default_value_textpb = 'bytes_value: "default"'
+            default_values = [b"default", b"default"]
+            expected_parsed_results = np.array([b"default", b"default"])
 
-    tensor_rep_textpb = base_tensor_rep_textpb.format(default_value_textpb)
-    expected_feature = tf.io.FixedLenFeature(
-        shape=[2], dtype=dtype, default_value=default_values)
-    result.append({
-        'testcase_name':
-            'FixedLenFeature_{}'.format(schema_pb2.FeatureType.Name(t)),
-        'tensor_representation':
-            tensor_rep_textpb,
-        'feature_type':
-            t,
-        'tf_example':
-            text_format.Parse('', tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            expected_feature,
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  return result
+        tensor_rep_textpb = base_tensor_rep_textpb.format(default_value_textpb)
+        expected_feature = tf.io.FixedLenFeature(
+            shape=[2], dtype=dtype, default_value=default_values
+        )
+        result.append(
+            {
+                "testcase_name": f"FixedLenFeature_{schema_pb2.FeatureType.Name(t)}",
+                "tensor_representation": tensor_rep_textpb,
+                "feature_type": t,
+                "tf_example": text_format.Parse(
+                    "", tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": expected_feature,
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    return result
 
 
 def _MakeFixedLenFeatureNoDefaultTestCases():
-  result = []
-  tensor_representation_textpb = """
+    result = []
+    tensor_representation_textpb = """
                 dense_tensor {
                   column_name: "dense_column"
                   shape {
@@ -1899,7 +1902,7 @@ def _MakeFixedLenFeatureNoDefaultTestCases():
                     }
                   }
                 }"""
-  example_textpb = """
+    example_textpb = """
   features {{
     feature {{
       key: "feat"
@@ -1907,72 +1910,67 @@ def _MakeFixedLenFeatureNoDefaultTestCases():
     }}
   }}
   """
-  for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
-    if t == schema_pb2.FeatureType.FLOAT:
-      value_textpb = """float_list { value: [1.0, 2.0, 3.0, 4.0] }"""
-      expected_parsed_results = np.array([1.0, 2.0, 3.0, 4.0])
-    elif t == schema_pb2.FeatureType.INT:
-      value_textpb = """int64_list { value: [1, 2, 3, 4] }"""
-      expected_parsed_results = np.array([1, 2, 3, 4])
-    elif t == schema_pb2.FeatureType.BYTES:
-      value_textpb = """bytes_list { value: ['one', 'two', 'three', 'four'] }"""
-      expected_parsed_results = np.array([b'one', b'two', b'three', b'four'])
-    expected_feature = tf.io.FixedLenFeature(
-        shape=[4], dtype=dtype, default_value=None)
-    result.append({
-        'testcase_name':
-            'FixedLenFeatureNoDefault_{}'.format(
-                schema_pb2.FeatureType.Name(t)),
-        'tensor_representation':
-            tensor_representation_textpb,
-        'feature_type':
-            t,
-        'tf_example':
-            text_format.Parse(
-                example_textpb.format(value_textpb),
-                tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            expected_feature,
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  return result
+    for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
+        if t == schema_pb2.FeatureType.FLOAT:
+            value_textpb = """float_list { value: [1.0, 2.0, 3.0, 4.0] }"""
+            expected_parsed_results = np.array([1.0, 2.0, 3.0, 4.0])
+        elif t == schema_pb2.FeatureType.INT:
+            value_textpb = """int64_list { value: [1, 2, 3, 4] }"""
+            expected_parsed_results = np.array([1, 2, 3, 4])
+        elif t == schema_pb2.FeatureType.BYTES:
+            value_textpb = """bytes_list { value: ['one', 'two', 'three', 'four'] }"""
+            expected_parsed_results = np.array([b"one", b"two", b"three", b"four"])
+        expected_feature = tf.io.FixedLenFeature(
+            shape=[4], dtype=dtype, default_value=None
+        )
+        result.append(
+            {
+                "testcase_name": f"FixedLenFeatureNoDefault_{schema_pb2.FeatureType.Name(t)}",
+                "tensor_representation": tensor_representation_textpb,
+                "feature_type": t,
+                "tf_example": text_format.Parse(
+                    example_textpb.format(value_textpb), tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": expected_feature,
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    return result
 
 
 def _MakeVarLenSparseFeatureTestCases():
-  result = []
-  tensor_representation_textpb = """
+    result = []
+    tensor_representation_textpb = """
                  varlen_sparse_tensor {
                    column_name: "varlen_sparse_tensor"
                  }"""
-  if tf.executing_eagerly():
-    sparse_tensor_factory = tf.SparseTensor
-  else:
-    sparse_tensor_factory = tf.compat.v1.SparseTensorValue
-  expected_parsed_results = sparse_tensor_factory(
-      indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[0])
-  for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
-    expected_feature = tf.io.VarLenFeature(dtype=dtype)
-    result.append({
-        'testcase_name':
-            'VarLenSparseFeature_{}'.format(schema_pb2.FeatureType.Name(t)),
-        'tensor_representation':
-            tensor_representation_textpb,
-        'feature_type':
-            t,
-        'tf_example':
-            text_format.Parse('', tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            expected_feature,
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  return result
+    if tf.executing_eagerly():
+        sparse_tensor_factory = tf.SparseTensor
+    else:
+        sparse_tensor_factory = tf.compat.v1.SparseTensorValue
+    expected_parsed_results = sparse_tensor_factory(
+        indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[0]
+    )
+    for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
+        expected_feature = tf.io.VarLenFeature(dtype=dtype)
+        result.append(
+            {
+                "testcase_name": f"VarLenSparseFeature_{schema_pb2.FeatureType.Name(t)}",
+                "tensor_representation": tensor_representation_textpb,
+                "feature_type": t,
+                "tf_example": text_format.Parse(
+                    "", tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": expected_feature,
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    return result
 
 
 def _MakeSparseFeatureTestCases():
-  result = []
-  tensor_representation_textpb_template = """
+    result = []
+    tensor_representation_textpb_template = """
                  sparse_tensor {{
                    index_column_names: ["key"]
                    value_column_name: "value"
@@ -1982,51 +1980,52 @@ def _MakeSparseFeatureTestCases():
                      }}
                    }}
                  }}"""
-  if tf.executing_eagerly():
-    sparse_tensor_factory = tf.SparseTensor
-  else:
-    sparse_tensor_factory = tf.compat.v1.SparseTensorValue
-  expected_parsed_results = sparse_tensor_factory(
-      indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[1])
-  for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
-    expected_feature = tf.io.SparseFeature(
-        index_key=['key'], value_key='value', dtype=dtype, size=[1])
-    result.append({
-        'testcase_name':
-            'SparseFeature_{}'.format(schema_pb2.FeatureType.Name(t)),
-        'tensor_representation':
-            tensor_representation_textpb_template.format(dim=1),
-        'feature_type':
-            t,
-        'tf_example':
-            text_format.Parse('', tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            expected_feature,
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  result.append({
-      'testcase_name':
-          'SparseFeature_unknown_dense_shape',
-      'tensor_representation':
-          tensor_representation_textpb_template.format(dim=-1),
-      'feature_type':
-          schema_pb2.FeatureType.BYTES,
-      'tf_example':
-          text_format.Parse('', tf.train.Example()).SerializeToString(),
-      'expected_feature':
-          tf.io.SparseFeature(
-              index_key=['key'], value_key='value', dtype=tf.string, size=[-1]),
-      'expected_parsed_results':
-          sparse_tensor_factory(
-              indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[-1])
-  })
-  for already_sorted in (True, False):
-    result.append({
-        'testcase_name':
-            f'SparseFeature_already_sorted_{already_sorted}',
-        'tensor_representation':
-            f"""
+    if tf.executing_eagerly():
+        sparse_tensor_factory = tf.SparseTensor
+    else:
+        sparse_tensor_factory = tf.compat.v1.SparseTensorValue
+    expected_parsed_results = sparse_tensor_factory(
+        indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[1]
+    )
+    for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
+        expected_feature = tf.io.SparseFeature(
+            index_key=["key"], value_key="value", dtype=dtype, size=[1]
+        )
+        result.append(
+            {
+                "testcase_name": f"SparseFeature_{schema_pb2.FeatureType.Name(t)}",
+                "tensor_representation": tensor_representation_textpb_template.format(
+                    dim=1
+                ),
+                "feature_type": t,
+                "tf_example": text_format.Parse(
+                    "", tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": expected_feature,
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    result.append(
+        {
+            "testcase_name": "SparseFeature_unknown_dense_shape",
+            "tensor_representation": tensor_representation_textpb_template.format(
+                dim=-1
+            ),
+            "feature_type": schema_pb2.FeatureType.BYTES,
+            "tf_example": text_format.Parse("", tf.train.Example()).SerializeToString(),
+            "expected_feature": tf.io.SparseFeature(
+                index_key=["key"], value_key="value", dtype=tf.string, size=[-1]
+            ),
+            "expected_parsed_results": sparse_tensor_factory(
+                indices=np.zeros((0, 1)), values=np.array([]), dense_shape=[-1]
+            ),
+        }
+    )
+    for already_sorted in (True, False):
+        result.append(
+            {
+                "testcase_name": f"SparseFeature_already_sorted_{already_sorted}",
+                "tensor_representation": f"""
                   sparse_tensor {{
                     index_column_names: ["key"]
                     value_column_name: "value"
@@ -2037,37 +2036,37 @@ def _MakeSparseFeatureTestCases():
                     }}
                     already_sorted: {'true' if already_sorted else 'false'}
                   }}""",
-        'feature_type':
-            schema_pb2.FeatureType.BYTES,
-        'tf_example':
-            text_format.Parse('', tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            tf.io.SparseFeature(
-                index_key=['key'],
-                value_key='value',
-                dtype=tf.string,
-                size=[1],
-                already_sorted=already_sorted),
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  return result
+                "feature_type": schema_pb2.FeatureType.BYTES,
+                "tf_example": text_format.Parse(
+                    "", tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": tf.io.SparseFeature(
+                    index_key=["key"],
+                    value_key="value",
+                    dtype=tf.string,
+                    size=[1],
+                    already_sorted=already_sorted,
+                ),
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    return result
 
 
 def _MakeRaggedFeatureTestCases():
-  result = []
+    result = []
 
-  tensor_representation_textpb = """
+    tensor_representation_textpb = """
     ragged_tensor {
       feature_path { step: "value" }
       partition { row_length: "row_length" }
       row_partition_dtype: INT32
     }"""
-  if tf.executing_eagerly():
-    ragged_tensor_factory = tf.RaggedTensor.from_row_splits
-  else:
-    ragged_tensor_factory = tf.compat.v1.ragged.RaggedTensorValue
-  example_textpb = """
+    if tf.executing_eagerly():
+        ragged_tensor_factory = tf.RaggedTensor.from_row_splits
+    else:
+        ragged_tensor_factory = tf.compat.v1.ragged.RaggedTensorValue
+    example_textpb = """
   features {{
     feature {{
       key: "value"
@@ -2079,53 +2078,54 @@ def _MakeRaggedFeatureTestCases():
     }}
   }}
   """
-  for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
-    if t == schema_pb2.FeatureType.FLOAT:
-      value_textpb = """float_list { value: [1.0, 2.0, 3.0, 4.0] }"""
-      expected_parsed_values = np.array([1.0, 2.0, 3.0, 4.0])
-    elif t == schema_pb2.FeatureType.INT:
-      value_textpb = """int64_list { value: [1, 2, 3, 4] }"""
-      expected_parsed_values = np.array([1, 2, 3, 4])
-    elif t == schema_pb2.FeatureType.BYTES:
-      value_textpb = """bytes_list { value: ['one', 'two', 'three', 'four'] }"""
-      expected_parsed_values = np.array([b'one', b'two', b'three', b'four'])
-    expected_parsed_results = ragged_tensor_factory(
-        values=expected_parsed_values,
-        row_splits=np.array([0, 3, 4], dtype=np.int32))
+    for t, dtype in _ALL_EXAMPLE_CODER_TYPES.items():
+        if t == schema_pb2.FeatureType.FLOAT:
+            value_textpb = """float_list { value: [1.0, 2.0, 3.0, 4.0] }"""
+            expected_parsed_values = np.array([1.0, 2.0, 3.0, 4.0])
+        elif t == schema_pb2.FeatureType.INT:
+            value_textpb = """int64_list { value: [1, 2, 3, 4] }"""
+            expected_parsed_values = np.array([1, 2, 3, 4])
+        elif t == schema_pb2.FeatureType.BYTES:
+            value_textpb = """bytes_list { value: ['one', 'two', 'three', 'four'] }"""
+            expected_parsed_values = np.array([b"one", b"two", b"three", b"four"])
+        expected_parsed_results = ragged_tensor_factory(
+            values=expected_parsed_values,
+            row_splits=np.array([0, 3, 4], dtype=np.int32),
+        )
 
-    expected_feature = tf.io.RaggedFeature(
-        value_key='value',
-        dtype=dtype,
-        partitions=(tf.io.RaggedFeature.RowLengths('row_length'),),
-        row_splits_dtype=tf.int32)
+        expected_feature = tf.io.RaggedFeature(
+            value_key="value",
+            dtype=dtype,
+            partitions=(tf.io.RaggedFeature.RowLengths("row_length"),),
+            row_splits_dtype=tf.int32,
+        )
 
-    result.append({
-        'testcase_name':
-            'RaggedFeature_{}'.format(schema_pb2.FeatureType.Name(t)),
-        'tensor_representation':
-            tensor_representation_textpb,
-        'feature_type':
-            t,
-        'tf_example':
-            text_format.Parse(
-                example_textpb.format(value_textpb),
-                tf.train.Example()).SerializeToString(),
-        'expected_feature':
-            expected_feature,
-        'expected_parsed_results':
-            expected_parsed_results
-    })
-  return result
+        result.append(
+            {
+                "testcase_name": f"RaggedFeature_{schema_pb2.FeatureType.Name(t)}",
+                "tensor_representation": tensor_representation_textpb,
+                "feature_type": t,
+                "tf_example": text_format.Parse(
+                    example_textpb.format(value_textpb), tf.train.Example()
+                ).SerializeToString(),
+                "expected_feature": expected_feature,
+                "expected_parsed_results": expected_parsed_results,
+            }
+        )
+    return result
 
 
-_PARSE_EXAMPLE_TEST_CASES = _MakeFixedLenFeatureTestCases(
-) + _MakeFixedLenFeatureNoDefaultTestCases(
-) + _MakeVarLenSparseFeatureTestCases() + _MakeSparseFeatureTestCases(
-) + _MakeRaggedFeatureTestCases()
+_PARSE_EXAMPLE_TEST_CASES = (
+    _MakeFixedLenFeatureTestCases()
+    + _MakeFixedLenFeatureNoDefaultTestCases()
+    + _MakeVarLenSparseFeatureTestCases()
+    + _MakeSparseFeatureTestCases()
+    + _MakeRaggedFeatureTestCases()
+)
 
 _CREATE_SEQUENCE_EXAMPLE_PARSER_CONFIG_TEST_CASES = [
     dict(
-        testcase_name='_no_struct_feature',
+        testcase_name="_no_struct_feature",
         schema="""
           feature {
             name: "int_feature"
@@ -2145,13 +2145,11 @@ def _MakeRaggedFeatureTestCases():
             }
           }
         """,
-        expected_context_features={
-            'int_feature': tf.io.VarLenFeature(dtype=tf.int64)
-        },
+        expected_context_features={"int_feature": tf.io.VarLenFeature(dtype=tf.int64)},
         expected_sequence_features={},
     ),
     dict(
-        testcase_name='_simple',
+        testcase_name="_simple",
         schema="""
           feature {
             name: "int_feature"
@@ -2220,26 +2218,26 @@ def _MakeRaggedFeatureTestCases():
           }
         """,
         expected_context_features={
-            'int_feature': tf.io.VarLenFeature(dtype=tf.int64),
-            'float_feature': tf.io.VarLenFeature(dtype=tf.float32)
+            "int_feature": tf.io.VarLenFeature(dtype=tf.int64),
+            "float_feature": tf.io.VarLenFeature(dtype=tf.float32),
         },
         expected_sequence_features={
-            'seq_string_feature':
-                tf.io.RaggedFeature(
-                    dtype=tf.string,
-                    value_key='string_feature',
-                    row_splits_dtype=tf.int64,
-                    partitions=[]),
-            'seq_int_feature':
-                tf.io.RaggedFeature(
-                    dtype=tf.int64,
-                    value_key='int_feature',
-                    row_splits_dtype=tf.int64,
-                    partitions=[])
+            "seq_string_feature": tf.io.RaggedFeature(
+                dtype=tf.string,
+                value_key="string_feature",
+                row_splits_dtype=tf.int64,
+                partitions=[],
+            ),
+            "seq_int_feature": tf.io.RaggedFeature(
+                dtype=tf.int64,
+                value_key="int_feature",
+                row_splits_dtype=tf.int64,
+                partitions=[],
+            ),
         },
     ),
     dict(
-        testcase_name='_no_primitive_feature',
+        testcase_name="_no_primitive_feature",
         schema="""
           feature {
             name: "##SEQUENCE##"
@@ -2270,16 +2268,16 @@ def _MakeRaggedFeatureTestCases():
         """,
         expected_context_features={},
         expected_sequence_features={
-            'seq_int_feature':
-                tf.io.RaggedFeature(
-                    dtype=tf.int64,
-                    value_key='int_feature',
-                    row_splits_dtype=tf.int64,
-                    partitions=[])
+            "seq_int_feature": tf.io.RaggedFeature(
+                dtype=tf.int64,
+                value_key="int_feature",
+                row_splits_dtype=tf.int64,
+                partitions=[],
+            )
         },
     ),
     dict(
-        testcase_name='_sparse_feature',
+        testcase_name="_sparse_feature",
         schema="""
           feature {
             name: "index_key"
@@ -2324,69 +2322,77 @@ def _MakeRaggedFeatureTestCases():
         """,
         expected_context_features={},
         expected_sequence_features={
-            'seq_int_feature':
-                tf.io.RaggedFeature(
-                    dtype=tf.int64,
-                    value_key='int_feature',
-                    row_splits_dtype=tf.int64,
-                    partitions=[])
+            "seq_int_feature": tf.io.RaggedFeature(
+                dtype=tf.int64,
+                value_key="int_feature",
+                row_splits_dtype=tf.int64,
+                partitions=[],
+            )
         },
     ),
 ]
 
 
 class TensorRepresentationUtilTest(parameterized.TestCase, tf.test.TestCase):
+    @parameterized.named_parameters(
+        *(_INFER_TEST_CASES + _LEGACY_INFER_TEST_CASES + _MIXED_SCHEMA_INFER_TEST_CASES)
+    )
+    def testInferTensorRepresentationsFromSchema(
+        self,
+        ascii_proto,
+        expected,
+        generate_legacy_feature_spec=False,
+        schema_is_mixed=False,
+    ):
+        if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
+            raise self.skipTest(
+                "This test exersizes legacy inference logic, but the "
+                "schema is not legacy schema."
+            )
+        schema = text_format.Parse(ascii_proto, schema_pb2.Schema())
+        if _IS_LEGACY_SCHEMA:
+            schema.generate_legacy_feature_spec = generate_legacy_feature_spec
+        expected_protos = {
+            k: text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())
+            for k, pbtxt in expected.items()
+        }
+        if not schema_is_mixed:
+            self.assertEqual(
+                expected_protos,
+                tensor_representation_util.InferTensorRepresentationsFromSchema(schema),
+            )
+        self.assertEqual(
+            expected_protos,
+            tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
+                schema
+            ),
+        )
 
-  @parameterized.named_parameters(
-      *(_INFER_TEST_CASES + _LEGACY_INFER_TEST_CASES +
-        _MIXED_SCHEMA_INFER_TEST_CASES))
-  def testInferTensorRepresentationsFromSchema(
-      self,
-      ascii_proto,
-      expected,
-      generate_legacy_feature_spec=False,
-      schema_is_mixed=False):
-    if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
-      raise self.skipTest('This test exersizes legacy inference logic, but the '
-                          'schema is not legacy schema.')
-    schema = text_format.Parse(ascii_proto, schema_pb2.Schema())
-    if _IS_LEGACY_SCHEMA:
-      schema.generate_legacy_feature_spec = generate_legacy_feature_spec
-    expected_protos = {
-        k: text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())
-        for k, pbtxt in expected.items()
-    }
-    if not schema_is_mixed:
-      self.assertEqual(
-          expected_protos,
-          tensor_representation_util.InferTensorRepresentationsFromSchema(
-              schema))
-    self.assertEqual(
-        expected_protos,
-        tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-            schema))
-
-  @parameterized.named_parameters(*_INVALID_SCHEMA_INFER_TEST_CASES)
-  def testInferTensorRepresentationsFromSchemaInvalidSchema(
-      self, ascii_proto, error_msg, generate_legacy_feature_spec=False):
-    if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
-      raise self.skipTest('This test exersizes legacy inference logic, but the '
-                          'schema is not legacy schema.')
-    schema = text_format.Parse(ascii_proto, schema_pb2.Schema())
-    if _IS_LEGACY_SCHEMA:
-      schema.generate_legacy_feature_spec = generate_legacy_feature_spec
-    with self.assertRaisesRegex(ValueError, error_msg):
-      tensor_representation_util.InferTensorRepresentationsFromSchema(schema)
-    with self.assertRaisesRegex(ValueError, error_msg):
-      tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-          schema)
+    @parameterized.named_parameters(*_INVALID_SCHEMA_INFER_TEST_CASES)
+    def testInferTensorRepresentationsFromSchemaInvalidSchema(
+        self, ascii_proto, error_msg, generate_legacy_feature_spec=False
+    ):
+        if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
+            raise self.skipTest(
+                "This test exersizes legacy inference logic, but the "
+                "schema is not legacy schema."
+            )
+        schema = text_format.Parse(ascii_proto, schema_pb2.Schema())
+        if _IS_LEGACY_SCHEMA:
+            schema.generate_legacy_feature_spec = generate_legacy_feature_spec
+        with self.assertRaisesRegex(ValueError, error_msg):
+            tensor_representation_util.InferTensorRepresentationsFromSchema(schema)
+        with self.assertRaisesRegex(ValueError, error_msg):
+            tensor_representation_util.InferTensorRepresentationsFromMixedSchema(schema)
 
-  def testGetTensorRepresentationsFromSchema(self):
-    self.assertIsNone(
-        tensor_representation_util.GetTensorRepresentationsFromSchema(
-            schema_pb2.Schema()))
-    schema = text_format.Parse(
-        """
+    def testGetTensorRepresentationsFromSchema(self):
+        self.assertIsNone(
+            tensor_representation_util.GetTensorRepresentationsFromSchema(
+                schema_pb2.Schema()
+            )
+        )
+        schema = text_format.Parse(
+            """
       tensor_representation_group {
         key: ""
         value {
@@ -2396,54 +2402,65 @@ def testGetTensorRepresentationsFromSchema(self):
           }
         }
       }
-    """, schema_pb2.Schema())
-    result = tensor_representation_util.GetTensorRepresentationsFromSchema(
-        schema)
-    self.assertTrue(result)
-    self.assertIn('a', result)
+    """,
+            schema_pb2.Schema(),
+        )
+        result = tensor_representation_util.GetTensorRepresentationsFromSchema(schema)
+        self.assertTrue(result)
+        self.assertIn("a", result)
 
-  @parameterized.named_parameters(*_GET_SOURCE_COLUMNS_TEST_CASES)
-  def testGetSourceColumnsFromTensorRepresentation(self, pbtxt, expected):
-    self.assertEqual(
-        [path.ColumnPath(e) for e in expected],
-        tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
-            text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())))
+    @parameterized.named_parameters(*_GET_SOURCE_COLUMNS_TEST_CASES)
+    def testGetSourceColumnsFromTensorRepresentation(self, pbtxt, expected):
+        self.assertEqual(
+            [path.ColumnPath(e) for e in expected],
+            tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
+                text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())
+            ),
+        )
 
-  @parameterized.named_parameters(*_GET_SOURCE_VALUE_COLUMNS_TEST_CASES)
-  def testGetSourceValueColumnFromTensorRepresentation(self, pbtxt, expected):
-    self.assertEqual(
-        path.ColumnPath(expected),
-        tensor_representation_util.GetSourceValueColumnFromTensorRepresentation(
-            text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())))
+    @parameterized.named_parameters(*_GET_SOURCE_VALUE_COLUMNS_TEST_CASES)
+    def testGetSourceValueColumnFromTensorRepresentation(self, pbtxt, expected):
+        self.assertEqual(
+            path.ColumnPath(expected),
+            tensor_representation_util.GetSourceValueColumnFromTensorRepresentation(
+                text_format.Parse(pbtxt, schema_pb2.TensorRepresentation())
+            ),
+        )
 
-  @test_util.run_all_in_graph_and_eager_modes
-  @parameterized.named_parameters(*_PARSE_EXAMPLE_TEST_CASES)
-  def testCreateTfExampleParserConfig(self, tensor_representation, feature_type,
-                                      tf_example, expected_feature,
-                                      expected_parsed_results):
-    tensor_representation = text_format.Parse(tensor_representation,
-                                              schema_pb2.TensorRepresentation())
-    feature = tensor_representation_util.CreateTfExampleParserConfig(
-        tensor_representation, feature_type)
+    @test_util.run_all_in_graph_and_eager_modes
+    @parameterized.named_parameters(*_PARSE_EXAMPLE_TEST_CASES)
+    def testCreateTfExampleParserConfig(
+        self,
+        tensor_representation,
+        feature_type,
+        tf_example,
+        expected_feature,
+        expected_parsed_results,
+    ):
+        tensor_representation = text_format.Parse(
+            tensor_representation, schema_pb2.TensorRepresentation()
+        )
+        feature = tensor_representation_util.CreateTfExampleParserConfig(
+            tensor_representation, feature_type
+        )
 
-    # Checks that the parser configs are correct.
-    for actual_arg, expected_arg in zip(feature, expected_feature):
-      self.assertAllEqual(actual_arg, expected_arg)
+        # Checks that the parser configs are correct.
+        for actual_arg, expected_arg in zip(feature, expected_feature):
+            self.assertAllEqual(actual_arg, expected_arg)
 
-    # Checks that the parser configs can be used with tf.io.parse_example()
-    actual_tensors = tf.io.parse_single_example(tf_example, {'feat': feature})
-    actual = actual_tensors['feat']
-    if isinstance(actual, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
-      self.assertAllEqual(actual.values, expected_parsed_results.values)
-      self.assertAllEqual(actual.indices, expected_parsed_results.indices)
-      self.assertAllEqual(actual.dense_shape,
-                          expected_parsed_results.dense_shape)
-    else:
-      self.assertAllEqual(actual, expected_parsed_results)
+        # Checks that the parser configs can be used with tf.io.parse_example()
+        actual_tensors = tf.io.parse_single_example(tf_example, {"feat": feature})
+        actual = actual_tensors["feat"]
+        if isinstance(actual, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
+            self.assertAllEqual(actual.values, expected_parsed_results.values)
+            self.assertAllEqual(actual.indices, expected_parsed_results.indices)
+            self.assertAllEqual(actual.dense_shape, expected_parsed_results.dense_shape)
+        else:
+            self.assertAllEqual(actual, expected_parsed_results)
 
-  def testCreateTfExampleParserConfigInvalidDefaultValue(self):
-    tensor_representation = text_format.Parse(
-        """
+    def testCreateTfExampleParserConfigInvalidDefaultValue(self):
+        tensor_representation = text_format.Parse(
+            """
                 dense_tensor {
                   column_name: "dense_column"
                   shape {
@@ -2454,83 +2471,106 @@ def testCreateTfExampleParserConfigInvalidDefaultValue(self):
                   default_value {
                     int_value: -1
                   }
-                }""", schema_pb2.TensorRepresentation())
-    feature_type = schema_pb2.FLOAT
-    with self.assertRaisesRegex(
-        ValueError, 'FeatureType:.* is incompatible with default_value:.*'):
-      tensor_representation_util.CreateTfExampleParserConfig(
-          tensor_representation, feature_type)
+                }""",
+            schema_pb2.TensorRepresentation(),
+        )
+        feature_type = schema_pb2.FLOAT
+        with self.assertRaisesRegex(
+            ValueError, "FeatureType:.* is incompatible with default_value:.*"
+        ):
+            tensor_representation_util.CreateTfExampleParserConfig(
+                tensor_representation, feature_type
+            )
 
-  def testCreateTfExampleParserConfigRagged(self):
-    feature_type = schema_pb2.INT
-    tensor_representation = text_format.Parse(
-        """
+    def testCreateTfExampleParserConfigRagged(self):
+        feature_type = schema_pb2.INT
+        tensor_representation = text_format.Parse(
+            """
                 ragged_tensor {
                   feature_path {
                     step: "foo"
                     step: "ragged_feature"
                   }
-                }""", schema_pb2.TensorRepresentation())
-    with self.assertRaisesRegex(
-        ValueError, ('Parsing spec from a RaggedTensor with multiple steps in '
-                     'feature_path is not implemented.')):
-      tensor_representation_util.CreateTfExampleParserConfig(
-          tensor_representation, feature_type)
+                }""",
+            schema_pb2.TensorRepresentation(),
+        )
+        with self.assertRaisesRegex(
+            ValueError,
+            (
+                "Parsing spec from a RaggedTensor with multiple steps in "
+                "feature_path is not implemented."
+            ),
+        ):
+            tensor_representation_util.CreateTfExampleParserConfig(
+                tensor_representation, feature_type
+            )
 
-  @parameterized.named_parameters(
-      *(_PROJECT_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES))
-  def testProjectTfmdSchema(self,
-                            schema,
-                            tensor_names,
-                            expected_projection=None,
-                            expected_error=None,
-                            generate_legacy_feature_spec=False):
-    schema = text_format.Parse(schema, schema_pb2.Schema())
-    if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
-      raise self.skipTest('This test exersizes legacy inference logic, but the '
-                          'schema is not legacy schema.')
+    @parameterized.named_parameters(
+        *(_PROJECT_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES)
+    )
+    def testProjectTfmdSchema(
+        self,
+        schema,
+        tensor_names,
+        expected_projection=None,
+        expected_error=None,
+        generate_legacy_feature_spec=False,
+    ):
+        schema = text_format.Parse(schema, schema_pb2.Schema())
+        if not _IS_LEGACY_SCHEMA and generate_legacy_feature_spec:
+            raise self.skipTest(
+                "This test exersizes legacy inference logic, but the "
+                "schema is not legacy schema."
+            )
 
-    if _IS_LEGACY_SCHEMA:
-      schema.generate_legacy_feature_spec = generate_legacy_feature_spec
-    if expected_error is None:
-      self.assertEqual(
-          text_format.Parse(expected_projection, schema_pb2.Schema()),
-          tensor_representation_util.ProjectTensorRepresentationsInSchema(
-              schema, tensor_names))
-    else:
-      with self.assertRaisesRegex(ValueError, expected_error):
-        _ = tensor_representation_util.ProjectTensorRepresentationsInSchema(
-            schema, tensor_names)
+        if _IS_LEGACY_SCHEMA:
+            schema.generate_legacy_feature_spec = generate_legacy_feature_spec
+        if expected_error is None:
+            self.assertEqual(
+                text_format.Parse(expected_projection, schema_pb2.Schema()),
+                tensor_representation_util.ProjectTensorRepresentationsInSchema(
+                    schema, tensor_names
+                ),
+            )
+        else:
+            with self.assertRaisesRegex(ValueError, expected_error):
+                _ = tensor_representation_util.ProjectTensorRepresentationsInSchema(
+                    schema, tensor_names
+                )
 
-  @parameterized.named_parameters(
-      *(_VALIDATE_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES))
-  def testValidateTensorRepresentationsInSchema(
-      self,
-      schema,
-      tensor_representation_group_name=tensor_representation_util
-      ._DEFAULT_TENSOR_REPRESENTATION_GROUP,
-      error=None):
-    schema = text_format.Parse(schema, schema_pb2.Schema())
-    if error is None:
-      tensor_representation_util.ValidateTensorRepresentationsInSchema(
-          schema, tensor_representation_group_name)
-    else:
-      with self.assertRaisesRegex(ValueError, error):
-        tensor_representation_util.ValidateTensorRepresentationsInSchema(
-            schema, tensor_representation_group_name)
+    @parameterized.named_parameters(
+        *(_VALIDATE_TENSOR_REPRESENTATIONS_IN_SCHEMA_TEST_CASES)
+    )
+    def testValidateTensorRepresentationsInSchema(
+        self,
+        schema,
+        tensor_representation_group_name=tensor_representation_util._DEFAULT_TENSOR_REPRESENTATION_GROUP,
+        error=None,
+    ):
+        schema = text_format.Parse(schema, schema_pb2.Schema())
+        if error is None:
+            tensor_representation_util.ValidateTensorRepresentationsInSchema(
+                schema, tensor_representation_group_name
+            )
+        else:
+            with self.assertRaisesRegex(ValueError, error):
+                tensor_representation_util.ValidateTensorRepresentationsInSchema(
+                    schema, tensor_representation_group_name
+                )
 
-  @parameterized.named_parameters(
-      *_CREATE_SEQUENCE_EXAMPLE_PARSER_CONFIG_TEST_CASES)
-  @test_util.run_all_in_graph_and_eager_modes
-  def testCreateTfSequenceExampleParserConfig(self, schema,
-                                              expected_context_features,
-                                              expected_sequence_features):
-    context_features, sequence_features = (
-        tensor_representation_util.CreateTfSequenceExampleParserConfig(
-            text_format.Parse(schema, schema_pb2.Schema())))
-    self.assertDictEqual(expected_context_features, context_features)
-    self.assertDictEqual(expected_sequence_features, sequence_features)
+    @parameterized.named_parameters(*_CREATE_SEQUENCE_EXAMPLE_PARSER_CONFIG_TEST_CASES)
+    @test_util.run_all_in_graph_and_eager_modes
+    def testCreateTfSequenceExampleParserConfig(
+        self, schema, expected_context_features, expected_sequence_features
+    ):
+        context_features, sequence_features = (
+            tensor_representation_util.CreateTfSequenceExampleParserConfig(
+                text_format.Parse(schema, schema_pb2.Schema())
+            )
+        )
+        self.assertDictEqual(expected_context_features, context_features)
+        self.assertDictEqual(expected_sequence_features, sequence_features)
 
 
-if __name__ == '__main__':
-  absltest.main()
+if __name__ == "__main__":
+    absltest.main()
diff --git a/tfx_bsl/tfxio/tensor_to_arrow.py b/tfx_bsl/tfxio/tensor_to_arrow.py
index 7c060ece..37aea8e6 100644
--- a/tfx_bsl/tfxio/tensor_to_arrow.py
+++ b/tfx_bsl/tfxio/tensor_to_arrow.py
@@ -14,516 +14,578 @@
 """Utils to convert TF Tensors or their values to Arrow arrays."""
 
 import abc
-from typing import Dict, List, Tuple, FrozenSet
+from typing import Dict, FrozenSet, List, Tuple
 
 import numpy as np
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.arrow import array_util
-from tfx_bsl.types import common_types
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.arrow import array_util
+from tfx_bsl.types import common_types
 
-class TensorsToRecordBatchConverter(object):
-  """Converts a Dict[str, TensorAlike] to a RecordBatch."""
-
-  __slots__ = ["_handlers", "_arrow_schema"]
 
-  class Options(object):
-    """Options to TensorsToRecordBatchConverter."""
+class TensorsToRecordBatchConverter:
+    """Converts a Dict[str, TensorAlike] to a RecordBatch."""
+
+    __slots__ = ["_handlers", "_arrow_schema"]
+
+    class Options:
+        """Options to TensorsToRecordBatchConverter."""
+
+        def __init__(
+            self,
+            sparse_tensor_value_column_name_template: str = "{tensor_name}$values",
+            sparse_tensor_index_column_name_template: str = "{tensor_name}$index{index}",
+            generic_sparse_tensor_names: FrozenSet[str] = frozenset(),
+        ):
+            """Initialzier.
+
+            Args:
+            ----
+              sparse_tensor_value_column_name_template: a `str.format()` template
+                for the column name for the values component of a generic
+                SparseTensor. This template should contain a "{tensor_name}" token.
+              sparse_tensor_index_column_name_template: a `str.format()` template
+                for the column name for the sparse index components of a generic
+                SparseTensor. This template should contain a "{tensor_name}" token
+                and an "{index}" token.
+              generic_sparse_tensor_names: a set of SparseTensor names that must be
+                converted as generic SparseTensors. Its purpose is to disambiguate
+                2-D varlen and 2-D generic SparseTensors. It is not necessary to
+                include names of >2-D SparseTensors since they can only be handled as
+                generic SparseTensors.
+            """
+            self.sparse_tensor_value_column_name_template = (
+                sparse_tensor_value_column_name_template
+            )
+            self.sparse_tensor_index_column_name_template = (
+                sparse_tensor_index_column_name_template
+            )
+            self.generic_sparse_tensor_names = generic_sparse_tensor_names
 
     def __init__(
-        self,
-        sparse_tensor_value_column_name_template: str = "{tensor_name}$values",
-        sparse_tensor_index_column_name_template:
-        str = "{tensor_name}$index{index}",
-        generic_sparse_tensor_names: FrozenSet[str] = frozenset()):
-      """Initialzier.
-
-      Args:
-        sparse_tensor_value_column_name_template: a `str.format()` template
-          for the column name for the values component of a generic
-          SparseTensor. This template should contain a "{tensor_name}" token.
-        sparse_tensor_index_column_name_template: a `str.format()` template
-          for the column name for the sparse index components of a generic
-          SparseTensor. This template should contain a "{tensor_name}" token
-          and an "{index}" token.
-        generic_sparse_tensor_names: a set of SparseTensor names that must be
-          converted as generic SparseTensors. Its purpose is to disambiguate
-          2-D varlen and 2-D generic SparseTensors. It is not necessary to
-          include names of >2-D SparseTensors since they can only be handled as
-          generic SparseTensors.
-      """
-      self.sparse_tensor_value_column_name_template = (
-          sparse_tensor_value_column_name_template)
-      self.sparse_tensor_index_column_name_template = (
-          sparse_tensor_index_column_name_template)
-      self.generic_sparse_tensor_names = generic_sparse_tensor_names
-
-  def __init__(self,
-               type_specs: Dict[str, tf.TypeSpec],
-               options: Options = Options()):
-    """Initializer.
-
-    Args:
-      type_specs: a mapping from names of tensors to their TypeSpecs. When
-        calling convert(), the dict of tensors passed in must contain the
-        same names, and each TensorAlike must be compatible to their TypeSpecs.
-      options: options.
-    """
-    self._handlers = _make_handlers(type_specs, options)
-    all_fields = []
-    seen_column_names = set()
-    for tensor_name, handler in self._handlers:
-      for f in handler.arrow_fields():
-        if f.name in seen_column_names:
-          raise ValueError("Handler for tensor {} produces a column of a "
-                           "conflicting name: {}".format(tensor_name, f.name))
-        seen_column_names.add(f.name)
-        all_fields.append(f)
-    self._arrow_schema = pa.schema(all_fields)
-
-  def arrow_schema(self) -> pa.Schema:
-    """Returns the schema of the RecordBatch output by convert()."""
-    return self._arrow_schema
-
-  def tensor_representations(
-      self) -> Dict[str, schema_pb2.TensorRepresentation]:
-    """Returns the TensorRepresentations for each TensorAlike.
-
-    The TypeSpecs of those TensorAlikes are specified in the initializer.
-    These TensorRepresentations, along with the schema returned by
-    arrow_schema() comprises all the information needed to turn the
-    RecordBatches produced by convert() back to TensorAlikes.
-
-    Returns:
-      a dict mapping tensor names to their TensorRepresentations.
-    """
-    return {
-        tensor_name: handler.tensor_representation()
-        for tensor_name, handler in self._handlers
-    }
-
-  def convert(self, tensors: Dict[str,
-                                  common_types.TensorAlike]) -> pa.RecordBatch:
-    """Converts a dict of tensors to a RecordBatch.
-
-    Args:
-      tensors: must contain the same keys as the dict passed to the initialier.
-        and each TensorAlike must be compatible with the corresponding TypeSpec.
-
-    Returns:
-      a RecordBatch, whose schema equals to self.arrow_schema().
-    """
-    assert len(self._handlers) == len(tensors)
-    arrays = []
-    for tensor_name, handler in self._handlers:
-      try:
-        arrays.extend(handler.convert(tensors[tensor_name]))
-      except Exception as e:
-        # Reraise the same exception with a extra information.
-        dbg_info = (f"tensor '{tensor_name}': {tensors[tensor_name]}\n"
-                    f"a type_spec of {handler._type_spec}")  # pylint: disable=protected-access.
-        e.args = (dbg_info,) + e.args
-        raise e.with_traceback(e.__traceback__)
-
-    return pa.record_batch(arrays, schema=self._arrow_schema)
+        self, type_specs: Dict[str, tf.TypeSpec], options: Options = Options()
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          type_specs: a mapping from names of tensors to their TypeSpecs. When
+            calling convert(), the dict of tensors passed in must contain the
+            same names, and each TensorAlike must be compatible to their TypeSpecs.
+          options: options.
+        """
+        self._handlers = _make_handlers(type_specs, options)
+        all_fields = []
+        seen_column_names = set()
+        for tensor_name, handler in self._handlers:
+            for f in handler.arrow_fields():
+                if f.name in seen_column_names:
+                    raise ValueError(
+                        f"Handler for tensor {tensor_name} produces a column of a "
+                        f"conflicting name: {f.name}"
+                    )
+                seen_column_names.add(f.name)
+                all_fields.append(f)
+        self._arrow_schema = pa.schema(all_fields)
+
+    def arrow_schema(self) -> pa.Schema:
+        """Returns the schema of the RecordBatch output by convert()."""
+        return self._arrow_schema
+
+    def tensor_representations(self) -> Dict[str, schema_pb2.TensorRepresentation]:
+        """Returns the TensorRepresentations for each TensorAlike.
+
+        The TypeSpecs of those TensorAlikes are specified in the initializer.
+        These TensorRepresentations, along with the schema returned by
+        arrow_schema() comprises all the information needed to turn the
+        RecordBatches produced by convert() back to TensorAlikes.
+
+        Returns
+        -------
+          a dict mapping tensor names to their TensorRepresentations.
+        """
+        return {
+            tensor_name: handler.tensor_representation()
+            for tensor_name, handler in self._handlers
+        }
+
+    def convert(self, tensors: Dict[str, common_types.TensorAlike]) -> pa.RecordBatch:
+        """Converts a dict of tensors to a RecordBatch.
+
+        Args:
+        ----
+          tensors: must contain the same keys as the dict passed to the initialier.
+            and each TensorAlike must be compatible with the corresponding TypeSpec.
+
+        Returns:
+        -------
+          a RecordBatch, whose schema equals to self.arrow_schema().
+        """
+        assert len(self._handlers) == len(tensors)
+        arrays = []
+        for tensor_name, handler in self._handlers:
+            try:
+                arrays.extend(handler.convert(tensors[tensor_name]))
+            except Exception as e:
+                # Reraise the same exception with a extra information.
+                dbg_info = (
+                    f"tensor '{tensor_name}': {tensors[tensor_name]}\n"
+                    f"a type_spec of {handler._type_spec}"
+                )  # pylint: disable=protected-access.
+                e.args = (dbg_info,) + e.args
+                raise e.with_traceback(e.__traceback__)
+
+        return pa.record_batch(arrays, schema=self._arrow_schema)
 
 
 class _TypeHandler(abc.ABC):
-  """Interface of a type handler that converts a tensor to arrow arrays.
+    """Interface of a type handler that converts a tensor to arrow arrays.
 
-  Note that a handler may convert a Tensor to multiple pa.Arrays. See
-  arrow_fields().
-  """
+    Note that a handler may convert a Tensor to multiple pa.Arrays. See
+    arrow_fields().
+    """
 
-  __slots__ = ["_tensor_name", "_type_spec"]
+    __slots__ = ["_tensor_name", "_type_spec"]
 
-  def __init__(self, tensor_name: str, type_spec: tf.TypeSpec):
-    self._tensor_name = tensor_name
-    self._type_spec = type_spec
+    def __init__(self, tensor_name: str, type_spec: tf.TypeSpec):
+        self._tensor_name = tensor_name
+        self._type_spec = type_spec
 
-  def convert(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
-    """Converts the given TensorAlike to pa.Arrays after validating its spec."""
-    actual_spec = tf.type_spec_from_value(tensor)
-    if not self._type_spec.is_compatible_with(actual_spec):
-      raise TypeError("Expected {} but got {}".format(self._type_spec,
-                                                      actual_spec))
-    return self._convert_internal(tensor)
+    def convert(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        """Converts the given TensorAlike to pa.Arrays after validating its spec."""
+        actual_spec = tf.type_spec_from_value(tensor)
+        if not self._type_spec.is_compatible_with(actual_spec):
+            raise TypeError(f"Expected {self._type_spec} but got {actual_spec}")
+        return self._convert_internal(tensor)
 
-  @abc.abstractmethod
-  def arrow_fields(self) -> List[pa.Field]:
-    """Returns the name and type (in a pa.Field) of result pa.Arrays.
+    @abc.abstractmethod
+    def arrow_fields(self) -> List[pa.Field]:
+        """Returns the name and type (in a pa.Field) of result pa.Arrays.
 
-    Note that a Handler can convert a Tensor to multiple pa.Arrays. It must
-    make sure _convert_internal() returns those Arrays of the types declared
-    here, in the correct order.
-    """
+        Note that a Handler can convert a Tensor to multiple pa.Arrays. It must
+        make sure _convert_internal() returns those Arrays of the types declared
+        here, in the correct order.
+        """
 
-  @abc.abstractmethod
-  def _convert_internal(self,
-                        tensor: common_types.TensorAlike) -> List[pa.Array]:
-    """Converts the given TensorAlike to a list of pa.Arrays.
+    @abc.abstractmethod
+    def _convert_internal(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        """Converts the given TensorAlike to a list of pa.Arrays.
 
-    Each element in the list should correspond to one in `arrow_fields()`.
+        Each element in the list should correspond to one in `arrow_fields()`.
 
-    Args:
-      tensor: the TensorAlike to be converted.
-    """
+        Args:
+        ----
+          tensor: the TensorAlike to be converted.
+        """
 
-  @abc.abstractmethod
-  def tensor_representation(self) -> schema_pb2.TensorRepresentation:
-    """Returns the TensorRepresentation.
+    @abc.abstractmethod
+    def tensor_representation(self) -> schema_pb2.TensorRepresentation:
+        """Returns the TensorRepresentation.
 
-    The TensorRepresentation, along with `arrow_fields()` can be used to
-    convert from pa.Arrays back to Tensors.
-    """
+        The TensorRepresentation, along with `arrow_fields()` can be used to
+        convert from pa.Arrays back to Tensors.
+        """
 
-  @staticmethod
-  @abc.abstractmethod
-  def can_handle(tensor_name: str, type_spec: tf.TypeSpec,
-                 options: TensorsToRecordBatchConverter.Options) -> bool:
-    """Returns `True` if the handler can handle the given `tf.TypeSpec`."""  # pytype: disable=bad-return-type
+    @staticmethod
+    @abc.abstractmethod
+    def can_handle(
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ) -> bool:
+        """Returns `True` if the handler can handle the given `tf.TypeSpec`."""  # pytype: disable=bad-return-type
 
 
 class _DenseTensorHandler(_TypeHandler):
-  """Handles Dense Tensors of known shape (except for the batch dim)."""
-
-  __slots__ = ["_values_arrow_type", "_unbatched_shape"]
-
-  def __init__(self, tensor_name: str, type_spec: tf.TypeSpec,
-               options: TensorsToRecordBatchConverter.Options):
-    del options
-    super().__init__(tensor_name, type_spec)
-    self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
-    self._unbatched_shape = type_spec.shape.as_list()[1:]
+    """Handles Dense Tensors of known shape (except for the batch dim)."""
 
-  def arrow_fields(self) -> List[pa.Field]:
-    return [
-        pa.field(self._tensor_name,
-                 pa.large_list(_tf_dtype_to_arrow_type(self._type_spec.dtype)))
-    ]
+    __slots__ = ["_values_arrow_type", "_unbatched_shape"]
 
-  def tensor_representation(self) -> schema_pb2.TensorRepresentation:
-    result = schema_pb2.TensorRepresentation()
-    result.dense_tensor.column_name = self._tensor_name
-    for d in self._unbatched_shape:
-      result.dense_tensor.shape.dim.add().size = d
-    return result
-
-  def _convert_internal(self,
-                        tensor: common_types.TensorAlike) -> List[pa.Array]:
-    assert isinstance(tensor, (tf.Tensor, np.ndarray)), type(tensor)
-    values_np = np.asarray(tensor)
-    shape = values_np.shape
-    elements_per_list = np.prod(shape[1:], dtype=np.int64)
-    if elements_per_list == 0:
-      offsets = np.zeros(shape[0] + 1, dtype=np.int64)
-    else:
-      offsets = np.arange(
-          0,
-          elements_per_list * shape[0] + 1,
-          elements_per_list,
-          dtype=np.int64)
-    values_np = np.reshape(values_np, -1)
-    return [pa.LargeListArray.from_arrays(offsets, pa.array(
-        values_np, self._values_arrow_type))]
-
-  @staticmethod
-  def can_handle(tensor_name: str, type_spec: tf.TypeSpec,
-                 options: TensorsToRecordBatchConverter.Options) -> bool:
-    del tensor_name
-    del options
-    if not isinstance(type_spec, tf.TensorSpec):
-      return False
-    if type_spec.dtype == tf.bool:
-      return False
-    # Can only handle batched tensor (at least 1-D).
-    if type_spec.shape.rank is None or type_spec.shape.rank <= 0:
-      return False
-    shape = type_spec.shape.as_list()
-    # Can only handle batched tensor (the batch size should be flexible).
-    if shape[0] is not None:
-      return False
-    return all(d is not None for d in shape[1:])
+    def __init__(
+        self,
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ):
+        del options
+        super().__init__(tensor_name, type_spec)
+        self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
+        self._unbatched_shape = type_spec.shape.as_list()[1:]
+
+    def arrow_fields(self) -> List[pa.Field]:
+        return [
+            pa.field(
+                self._tensor_name,
+                pa.large_list(_tf_dtype_to_arrow_type(self._type_spec.dtype)),
+            )
+        ]
+
+    def tensor_representation(self) -> schema_pb2.TensorRepresentation:
+        result = schema_pb2.TensorRepresentation()
+        result.dense_tensor.column_name = self._tensor_name
+        for d in self._unbatched_shape:
+            result.dense_tensor.shape.dim.add().size = d
+        return result
+
+    def _convert_internal(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        assert isinstance(tensor, (tf.Tensor, np.ndarray)), type(tensor)
+        values_np = np.asarray(tensor)
+        shape = values_np.shape
+        elements_per_list = np.prod(shape[1:], dtype=np.int64)
+        if elements_per_list == 0:
+            offsets = np.zeros(shape[0] + 1, dtype=np.int64)
+        else:
+            offsets = np.arange(
+                0, elements_per_list * shape[0] + 1, elements_per_list, dtype=np.int64
+            )
+        values_np = np.reshape(values_np, -1)
+        return [
+            pa.LargeListArray.from_arrays(
+                offsets, pa.array(values_np, self._values_arrow_type)
+            )
+        ]
+
+    @staticmethod
+    def can_handle(
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ) -> bool:
+        del tensor_name
+        del options
+        if not isinstance(type_spec, tf.TensorSpec):
+            return False
+        if type_spec.dtype == tf.bool:
+            return False
+        # Can only handle batched tensor (at least 1-D).
+        if type_spec.shape.rank is None or type_spec.shape.rank <= 0:
+            return False
+        shape = type_spec.shape.as_list()
+        # Can only handle batched tensor (the batch size should be flexible).
+        if shape[0] is not None:
+            return False
+        return all(d is not None for d in shape[1:])
 
 
 class _VarLenSparseTensorHandler(_TypeHandler):
-  """Handles 2-D var-len (ragged) sparse tensor."""
-
-  __slots__ = ["_values_arrow_type"]
-
-  def __init__(self, tensor_name: str, type_spec: tf.TypeSpec,
-               options: TensorsToRecordBatchConverter.Options):
-    del options
-    super().__init__(tensor_name, type_spec)
-    self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
-
-  def _convert_internal(self,
-                        tensor: common_types.TensorAlike) -> List[pa.Array]:
-    # Algorithm:
-    # Assume:
-    #   - the COO indices are sorted (partially checked below)
-    #   - the SparseTensor is 2-D (checked in can_handle())
-    #   - the SparseTensor is ragged
-    # Then the first dim of those COO indices contains "parent indices":
-    # parent_index[i] == j means i-th value belong to j-th sub list.
-    # Then we have a C++ util to convert parent indices + values to a ListArray.
-    #
-    # Note that the resulting ListArray doesn't explicitly store the second
-    # dense dimension. When it is converted back to SparseTensor with
-    # tensor_adapter the second dense dimension is recovered as an upper bound
-    # for second indices + 1. Therefore, if SparseTensor's second dense
-    # dimension is not tight, then the composition
-    # TensorAdapter(TensorsToRecordBatchConverter()) is not an identity.
-    dense_shape = np.asarray(tensor.dense_shape)
-    indices = np.asarray(tensor.indices)
-    parent_indices = indices[:, 0]
-    assert np.min(np.diff(parent_indices), initial=0) >= 0, (
-        "The sparse indices must be sorted")
-    return [
-        array_util.MakeListArrayFromParentIndicesAndValues(
-            dense_shape[0],
-            pa.array(parent_indices, type=pa.int64()),
-            pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
-            empty_list_as_null=False)
-    ]
+    """Handles 2-D var-len (ragged) sparse tensor."""
 
-  def arrow_fields(self) -> List[pa.Field]:
-    return [
-        pa.field(self._tensor_name,
-                 pa.large_list(_tf_dtype_to_arrow_type(self._type_spec.dtype)))
-    ]
+    __slots__ = ["_values_arrow_type"]
 
-  def tensor_representation(self) -> schema_pb2.TensorRepresentation:
-    result = schema_pb2.TensorRepresentation()
-    result.varlen_sparse_tensor.column_name = self._tensor_name
-    return result
-
-  @staticmethod
-  def can_handle(tensor_name: str, type_spec: tf.TypeSpec,
-                 options: TensorsToRecordBatchConverter.Options) -> bool:
-    if not isinstance(type_spec, tf.SparseTensorSpec):
-      return False
-    return (type_spec.shape.is_compatible_with([None, None]) and
-            type_spec.dtype != tf.bool and
-            tensor_name not in options.generic_sparse_tensor_names)
+    def __init__(
+        self,
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ):
+        del options
+        super().__init__(tensor_name, type_spec)
+        self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
+
+    def _convert_internal(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        # Algorithm:
+        # Assume:
+        #   - the COO indices are sorted (partially checked below)
+        #   - the SparseTensor is 2-D (checked in can_handle())
+        #   - the SparseTensor is ragged
+        # Then the first dim of those COO indices contains "parent indices":
+        # parent_index[i] == j means i-th value belong to j-th sub list.
+        # Then we have a C++ util to convert parent indices + values to a ListArray.
+        #
+        # Note that the resulting ListArray doesn't explicitly store the second
+        # dense dimension. When it is converted back to SparseTensor with
+        # tensor_adapter the second dense dimension is recovered as an upper bound
+        # for second indices + 1. Therefore, if SparseTensor's second dense
+        # dimension is not tight, then the composition
+        # TensorAdapter(TensorsToRecordBatchConverter()) is not an identity.
+        dense_shape = np.asarray(tensor.dense_shape)
+        indices = np.asarray(tensor.indices)
+        parent_indices = indices[:, 0]
+        assert (
+            np.min(np.diff(parent_indices), initial=0) >= 0
+        ), "The sparse indices must be sorted"
+        return [
+            array_util.MakeListArrayFromParentIndicesAndValues(
+                dense_shape[0],
+                pa.array(parent_indices, type=pa.int64()),
+                pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
+                empty_list_as_null=False,
+            )
+        ]
+
+    def arrow_fields(self) -> List[pa.Field]:
+        return [
+            pa.field(
+                self._tensor_name,
+                pa.large_list(_tf_dtype_to_arrow_type(self._type_spec.dtype)),
+            )
+        ]
+
+    def tensor_representation(self) -> schema_pb2.TensorRepresentation:
+        result = schema_pb2.TensorRepresentation()
+        result.varlen_sparse_tensor.column_name = self._tensor_name
+        return result
+
+    @staticmethod
+    def can_handle(
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ) -> bool:
+        if not isinstance(type_spec, tf.SparseTensorSpec):
+            return False
+        return (
+            type_spec.shape.is_compatible_with([None, None])
+            and type_spec.dtype != tf.bool
+            and tensor_name not in options.generic_sparse_tensor_names
+        )
 
 
 class _RaggedTensorHandler(_TypeHandler):
-  """Handles ragged tensor."""
-
-  __slots__ = ["_values_arrow_type", "_row_partition_dtype", "_unbatched_shape"]
-
-  def __init__(self, tensor_name: str, type_spec: tf.TypeSpec,
-               options: TensorsToRecordBatchConverter.Options):
-    del options
-    super().__init__(tensor_name, type_spec)
-
-    # TODO(b/159717195): clean up protected-access
-    self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec._dtype)  # pylint: disable=protected-access
-    self._row_partition_dtype = type_spec._row_splits_dtype  # pylint: disable=protected-access
-    self._unbatched_shape = type_spec._shape.as_list()[1:]  # pylint: disable=protected-access
-
-  def _convert_internal(self,
-                        tensor: common_types.TensorAlike) -> List[pa.Array]:
-    # Unnest all outer ragged dimensions keeping the offsets.
-    nested_offsets = []
-    while isinstance(tensor,
-                     (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)):
-      nested_offsets.append(np.asarray(tensor.row_splits))
-      tensor = tensor.values
-
-    # Calculate the number of inner uniform dimension elements per one first
-    # ragged dimension element.
-    inner_dimension_elements = np.prod(tensor.shape[1:], dtype=np.int64)
-
-    result = pa.array(np.ravel(tensor), self._values_arrow_type)
-    # Nest values. The innermost sequence of offsets must be adjusted by the
-    # number of uniform dimension elements.
-    nested_offsets_iter = reversed(nested_offsets)
-    result = pa.LargeListArray.from_arrays(
-        offsets=next(nested_offsets_iter) * inner_dimension_elements,
-        values=result)
-    for offsets in nested_offsets_iter:
-      result = pa.LargeListArray.from_arrays(offsets=offsets, values=result)
-
-    return [result]
-
-  def arrow_fields(self) -> List[pa.Field]:
-    # TODO(b/159717195): clean up protected-access
-    arrow_type = _tf_dtype_to_arrow_type(self._type_spec._dtype)  # pylint: disable=protected-access
-    for _ in range(self._type_spec._ragged_rank):  # pylint:disable=protected-access
-      arrow_type = pa.large_list(arrow_type)
-    return [
-        pa.field(self._tensor_name, arrow_type)
-    ]
+    """Handles ragged tensor."""
 
-  def tensor_representation(self) -> schema_pb2.TensorRepresentation:
-    result = schema_pb2.TensorRepresentation()
-    result.ragged_tensor.feature_path.step.append(self._tensor_name)
-    row_partition_dtype = (
-        schema_pb2.TensorRepresentation.RowPartitionDType.INT32
-        if self._row_partition_dtype == tf.int32 else
-        schema_pb2.TensorRepresentation.RowPartitionDType.INT64)
-    result.ragged_tensor.row_partition_dtype = row_partition_dtype
-    for dim in self._unbatched_shape:
-      # Create uniform_row_length partitions only.
-      if dim is not None:
-        result.ragged_tensor.partition.append(
-            schema_pb2.TensorRepresentation.RaggedTensor.Partition(
-                uniform_row_length=dim))
-    return result
-
-  @staticmethod
-  def can_handle(tensor_name: str, type_spec: tf.TypeSpec,
-                 options: TensorsToRecordBatchConverter.Options) -> bool:
-    del tensor_name
-    del options
-    if not isinstance(type_spec, tf.RaggedTensorSpec):
-      return False
-    # TODO(b/159717195): clean up protected-access
-    # pylint:disable=protected-access
-    if type_spec._ragged_rank < 1:
-      # We don't support RaggedTensors that are not ragged. They are
-      # essentially dense tensors and should be converted to them and be
-      # handled by the DenseTensorHandler.
-      return False
-    shape = type_spec._shape.as_list()
-    if (any(shape[1:type_spec._ragged_rank + 1]) or
-        not all(shape[type_spec._ragged_rank + 1:])):
-      # We only support inner uniform dimensions.
-      return False
-    return type_spec._dtype != tf.bool
-    # pylint:enable=protected-access
+    __slots__ = ["_values_arrow_type", "_row_partition_dtype", "_unbatched_shape"]
+
+    def __init__(
+        self,
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ):
+        del options
+        super().__init__(tensor_name, type_spec)
+
+        # TODO(b/159717195): clean up protected-access
+        self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec._dtype)  # pylint: disable=protected-access
+        self._row_partition_dtype = type_spec._row_splits_dtype  # pylint: disable=protected-access
+        self._unbatched_shape = type_spec._shape.as_list()[1:]  # pylint: disable=protected-access
+
+    def _convert_internal(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        # Unnest all outer ragged dimensions keeping the offsets.
+        nested_offsets = []
+        while isinstance(
+            tensor, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)
+        ):
+            nested_offsets.append(np.asarray(tensor.row_splits))
+            tensor = tensor.values
+
+        # Calculate the number of inner uniform dimension elements per one first
+        # ragged dimension element.
+        inner_dimension_elements = np.prod(tensor.shape[1:], dtype=np.int64)
+
+        result = pa.array(np.ravel(tensor), self._values_arrow_type)
+        # Nest values. The innermost sequence of offsets must be adjusted by the
+        # number of uniform dimension elements.
+        nested_offsets_iter = reversed(nested_offsets)
+        result = pa.LargeListArray.from_arrays(
+            offsets=next(nested_offsets_iter) * inner_dimension_elements, values=result
+        )
+        for offsets in nested_offsets_iter:
+            result = pa.LargeListArray.from_arrays(offsets=offsets, values=result)
+
+        return [result]
+
+    def arrow_fields(self) -> List[pa.Field]:
+        # TODO(b/159717195): clean up protected-access
+        arrow_type = _tf_dtype_to_arrow_type(self._type_spec._dtype)  # pylint: disable=protected-access
+        for _ in range(self._type_spec._ragged_rank):  # pylint:disable=protected-access
+            arrow_type = pa.large_list(arrow_type)
+        return [pa.field(self._tensor_name, arrow_type)]
+
+    def tensor_representation(self) -> schema_pb2.TensorRepresentation:
+        result = schema_pb2.TensorRepresentation()
+        result.ragged_tensor.feature_path.step.append(self._tensor_name)
+        row_partition_dtype = (
+            schema_pb2.TensorRepresentation.RowPartitionDType.INT32
+            if self._row_partition_dtype == tf.int32
+            else schema_pb2.TensorRepresentation.RowPartitionDType.INT64
+        )
+        result.ragged_tensor.row_partition_dtype = row_partition_dtype
+        for dim in self._unbatched_shape:
+            # Create uniform_row_length partitions only.
+            if dim is not None:
+                result.ragged_tensor.partition.append(
+                    schema_pb2.TensorRepresentation.RaggedTensor.Partition(
+                        uniform_row_length=dim
+                    )
+                )
+        return result
+
+    @staticmethod
+    def can_handle(
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ) -> bool:
+        del tensor_name
+        del options
+        if not isinstance(type_spec, tf.RaggedTensorSpec):
+            return False
+        # TODO(b/159717195): clean up protected-access
+        # pylint:disable=protected-access
+        if type_spec._ragged_rank < 1:
+            # We don't support RaggedTensors that are not ragged. They are
+            # essentially dense tensors and should be converted to them and be
+            # handled by the DenseTensorHandler.
+            return False
+        shape = type_spec._shape.as_list()
+        if any(shape[1 : type_spec._ragged_rank + 1]) or not all(
+            shape[type_spec._ragged_rank + 1 :]
+        ):
+            # We only support inner uniform dimensions.
+            return False
+        return type_spec._dtype != tf.bool
+        # pylint:enable=protected-access
 
 
 class _SparseTensorHandler(_TypeHandler):
-  """Handles generic SparseTensor.
-
-  Note that this handler does not handle any 2-D / 1-D SparseTensor
-  by default (they are handled by _VarLenSparseTensorHandler). However, not all
-  2-D SparseTensors are VarLenSparseTensors, if you want to handle specific 2-D
-  SparseTensor as a generic SparseTensor, add its name to
-  options.generic_sparse_tensor_names.
-  """
-
-  __slots__ = ["_values_arrow_type", "_unbatched_shape",
-               "_value_column_name", "_index_column_names"]
-
-  def __init__(self, tensor_name: str, type_spec: tf.TypeSpec,
-               options: TensorsToRecordBatchConverter.Options):
-    super().__init__(tensor_name, type_spec)
-    self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
-    self._unbatched_shape = type_spec.shape.as_list()[1:]
-    self._value_column_name = (
-        options.sparse_tensor_value_column_name_template.format(
-            tensor_name=tensor_name))
-    self._index_column_names = [
-        options.sparse_tensor_index_column_name_template.format(
-            tensor_name=tensor_name, index=i)
-        for i in range(len(self._unbatched_shape))
-    ]
+    """Handles generic SparseTensor.
+
+    Note that this handler does not handle any 2-D / 1-D SparseTensor
+    by default (they are handled by _VarLenSparseTensorHandler). However, not all
+    2-D SparseTensors are VarLenSparseTensors, if you want to handle specific 2-D
+    SparseTensor as a generic SparseTensor, add its name to
+    options.generic_sparse_tensor_names.
+    """
 
-  def _convert_internal(self,
-                        tensor: common_types.TensorAlike) -> List[pa.Array]:
-    # Transpose the indices array (and materialize the result in C-order)
-    # because later we will use individual columns of the original indices.
-    indices_np = (
-        np.ascontiguousarray(
-            np.transpose(np.asarray(tensor.indices)), dtype=np.int64))
-
-    # the first column of indices identifies which row each sparse value belongs
-    # to.
-    parent_indices = pa.array(indices_np[0, :], type=pa.int64())
-    num_rows = int(np.asarray(tensor.dense_shape)[0])
-
-    result = [
-        array_util.MakeListArrayFromParentIndicesAndValues(
-            num_rows,
-            parent_indices,
-            pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
-            empty_list_as_null=False)
+    __slots__ = [
+        "_values_arrow_type",
+        "_unbatched_shape",
+        "_value_column_name",
+        "_index_column_names",
     ]
 
-    for i in range(len(self._index_column_names)):
-      result.append(
-          array_util.MakeListArrayFromParentIndicesAndValues(
-              num_rows,
-              parent_indices,
-              pa.array(indices_np[i + 1, :], type=pa.int64()),
-              empty_list_as_null=False))
-
-    return result
-
-  def arrow_fields(self) -> List[pa.Field]:
-    return ([
-        pa.field(self._value_column_name, pa.large_list(
-            self._values_arrow_type))
-    ] + [
-        pa.field(n, pa.large_list(pa.int64())) for n in self._index_column_names
-    ])
-
-  def tensor_representation(self) -> schema_pb2.TensorRepresentation:
-    result = schema_pb2.TensorRepresentation()
-    for d in self._unbatched_shape:
-      result.sparse_tensor.dense_shape.dim.add().size = -1 if d is None else d
-    result.sparse_tensor.value_column_name = self._value_column_name
-    result.sparse_tensor.index_column_names.extend(self._index_column_names)
-    return result
-
-  @staticmethod
-  def can_handle(tensor_name: str, type_spec: tf.TypeSpec,
-                 options: TensorsToRecordBatchConverter.Options) -> bool:
-    if not isinstance(type_spec, tf.SparseTensorSpec):
-      return False
-    if type_spec.shape.rank is None or type_spec.shape.rank <= 1:
-      return False
-    if (type_spec.shape.rank == 2 and
-        tensor_name not in options.generic_sparse_tensor_names):
-      return False
-    return True
+    def __init__(
+        self,
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ):
+        super().__init__(tensor_name, type_spec)
+        self._values_arrow_type = _tf_dtype_to_arrow_type(type_spec.dtype)
+        self._unbatched_shape = type_spec.shape.as_list()[1:]
+        self._value_column_name = (
+            options.sparse_tensor_value_column_name_template.format(
+                tensor_name=tensor_name
+            )
+        )
+        self._index_column_names = [
+            options.sparse_tensor_index_column_name_template.format(
+                tensor_name=tensor_name, index=i
+            )
+            for i in range(len(self._unbatched_shape))
+        ]
+
+    def _convert_internal(self, tensor: common_types.TensorAlike) -> List[pa.Array]:
+        # Transpose the indices array (and materialize the result in C-order)
+        # because later we will use individual columns of the original indices.
+        indices_np = np.ascontiguousarray(
+            np.transpose(np.asarray(tensor.indices)), dtype=np.int64
+        )
+
+        # the first column of indices identifies which row each sparse value belongs
+        # to.
+        parent_indices = pa.array(indices_np[0, :], type=pa.int64())
+        num_rows = int(np.asarray(tensor.dense_shape)[0])
+
+        result = [
+            array_util.MakeListArrayFromParentIndicesAndValues(
+                num_rows,
+                parent_indices,
+                pa.array(np.asarray(tensor.values), type=self._values_arrow_type),
+                empty_list_as_null=False,
+            )
+        ]
+
+        for i in range(len(self._index_column_names)):
+            result.append(
+                array_util.MakeListArrayFromParentIndicesAndValues(
+                    num_rows,
+                    parent_indices,
+                    pa.array(indices_np[i + 1, :], type=pa.int64()),
+                    empty_list_as_null=False,
+                )
+            )
+
+        return result
+
+    def arrow_fields(self) -> List[pa.Field]:
+        return [
+            pa.field(self._value_column_name, pa.large_list(self._values_arrow_type))
+        ] + [pa.field(n, pa.large_list(pa.int64())) for n in self._index_column_names]
+
+    def tensor_representation(self) -> schema_pb2.TensorRepresentation:
+        result = schema_pb2.TensorRepresentation()
+        for d in self._unbatched_shape:
+            result.sparse_tensor.dense_shape.dim.add().size = -1 if d is None else d
+        result.sparse_tensor.value_column_name = self._value_column_name
+        result.sparse_tensor.index_column_names.extend(self._index_column_names)
+        return result
+
+    @staticmethod
+    def can_handle(
+        tensor_name: str,
+        type_spec: tf.TypeSpec,
+        options: TensorsToRecordBatchConverter.Options,
+    ) -> bool:
+        if not isinstance(type_spec, tf.SparseTensorSpec):
+            return False
+        if type_spec.shape.rank is None or type_spec.shape.rank <= 1:
+            return False
+        if (
+            type_spec.shape.rank == 2
+            and tensor_name not in options.generic_sparse_tensor_names
+        ):
+            return False
+        return True
 
 
 _ALL_HANDLERS_CLS = [
-    _VarLenSparseTensorHandler, _RaggedTensorHandler, _DenseTensorHandler,
+    _VarLenSparseTensorHandler,
+    _RaggedTensorHandler,
+    _DenseTensorHandler,
     _SparseTensorHandler,
 ]
 
 
 def _tf_dtype_to_arrow_type(dtype: tf.DType):
-  """Maps a tf Dtype to an Arrow type."""
-  if dtype == tf.string:
-    return pa.large_binary()
-  elif dtype == tf.bool:
-    raise TypeError("Unable to handle bool tensors -- consider casting it to a "
-                    "tf.uint8")
-  return pa.from_numpy_dtype(dtype.as_numpy_dtype)
+    """Maps a tf Dtype to an Arrow type."""
+    if dtype == tf.string:
+        return pa.large_binary()
+    elif dtype == tf.bool:
+        raise TypeError(
+            "Unable to handle bool tensors -- consider casting it to a " "tf.uint8"
+        )
+    return pa.from_numpy_dtype(dtype.as_numpy_dtype)
 
 
 def _make_handlers(
-    type_specs: Dict[str, tf.TypeSpec],
-    options: TensorsToRecordBatchConverter.Options
+    type_specs: Dict[str, tf.TypeSpec], options: TensorsToRecordBatchConverter.Options
 ) -> List[Tuple[str, _TypeHandler]]:
-  return [(tensor_name, _get_handler(tensor_name, type_spec, options))
-          for tensor_name, type_spec in sorted(type_specs.items())]
+    return [
+        (tensor_name, _get_handler(tensor_name, type_spec, options))
+        for tensor_name, type_spec in sorted(type_specs.items())
+    ]
 
 
 def _get_handler(
-    tensor_name: str, type_spec: tf.TypeSpec,
-    options: TensorsToRecordBatchConverter.Options) -> _TypeHandler:
-  """Returns a TypeHandler that can handle `type_spec`."""
-  for handler_cls in _ALL_HANDLERS_CLS:
-    if handler_cls.can_handle(tensor_name, type_spec, options):
-      return handler_cls(tensor_name, type_spec, options)
-  # We don't support tf.bool now because:
-  #   - if converted to pa.bool(), TFDV does not know how to handle it.
-  #   - if converted to pa.uint8() (or other integral types), we don't have
-  #     a place to note it was previously a tf.bool so TensorAdapter can
-  #     revert it as a tf.bool.
-  raise ValueError(
-      "No handler found for tensor {} of spec {}. "
-      "Note that tensors with dtype == tf.bool cannot be handled in general -- "
-      "consider casting them to tf.uint8."
-      .format(tensor_name, type_spec))
+    tensor_name: str,
+    type_spec: tf.TypeSpec,
+    options: TensorsToRecordBatchConverter.Options,
+) -> _TypeHandler:
+    """Returns a TypeHandler that can handle `type_spec`."""
+    for handler_cls in _ALL_HANDLERS_CLS:
+        if handler_cls.can_handle(tensor_name, type_spec, options):
+            return handler_cls(tensor_name, type_spec, options)
+    # We don't support tf.bool now because:
+    #   - if converted to pa.bool(), TFDV does not know how to handle it.
+    #   - if converted to pa.uint8() (or other integral types), we don't have
+    #     a place to note it was previously a tf.bool so TensorAdapter can
+    #     revert it as a tf.bool.
+    raise ValueError(
+        f"No handler found for tensor {tensor_name} of spec {type_spec}. "
+        "Note that tensors with dtype == tf.bool cannot be handled in general -- "
+        "consider casting them to tf.uint8."
+    )
diff --git a/tfx_bsl/tfxio/tensor_to_arrow_test.py b/tfx_bsl/tfxio/tensor_to_arrow_test.py
index c83f8c6b..22cfcc08 100644
--- a/tfx_bsl/tfxio/tensor_to_arrow_test.py
+++ b/tfx_bsl/tfxio/tensor_to_arrow_test.py
@@ -16,13 +16,11 @@
 import numpy as np
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_to_arrow
-from tfx_bsl.tfxio import test_case
-
 from google.protobuf import text_format
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import tensor_adapter, tensor_to_arrow, test_case
+
 _TF_TYPE_TO_ARROW_TYPE = {
     tf.int8: pa.int8(),
     tf.int16: pa.int16(),
@@ -41,85 +39,91 @@
 
 
 def _make_2d_dense_tensor_test_cases():
-  result = []
-  for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
-    if tf_type == tf.string:
-      tensor = tf.constant([[b"1", b"2"], [b"3", b"4"]], dtype=tf.string)
-      expected_array = pa.array([[b"1", b"2"], [b"3", b"4"]],
-                                type=pa.large_list(arrow_type))
-    else:
-      tensor = tf.constant([[1, 2], [3, 4]], dtype=tf_type)
-      expected_array = pa.array([[1, 2], [3, 4]],
-                                type=pa.large_list(arrow_type))
-    result.append(
-        dict(
-            testcase_name="2d_dense_tensor_%s" % tf_type.name,
-            type_specs={"dt": tf.TensorSpec([None, 2], tf_type)},
-            expected_schema={"dt": pa.large_list(arrow_type)},
-            expected_tensor_representations={
-                "dt":
-                    """dense_tensor {
+    result = []
+    for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
+        if tf_type == tf.string:
+            tensor = tf.constant([[b"1", b"2"], [b"3", b"4"]], dtype=tf.string)
+            expected_array = pa.array(
+                [[b"1", b"2"], [b"3", b"4"]], type=pa.large_list(arrow_type)
+            )
+        else:
+            tensor = tf.constant([[1, 2], [3, 4]], dtype=tf_type)
+            expected_array = pa.array([[1, 2], [3, 4]], type=pa.large_list(arrow_type))
+        result.append(
+            dict(
+                testcase_name="2d_dense_tensor_%s" % tf_type.name,
+                type_specs={"dt": tf.TensorSpec([None, 2], tf_type)},
+                expected_schema={"dt": pa.large_list(arrow_type)},
+                expected_tensor_representations={
+                    "dt": """dense_tensor {
                          column_name: "dt"
                          shape { dim { size: 2} }
                        }""",
-            },
-            tensor_input={"dt": tensor},
-            expected_record_batch={"dt": expected_array}))
-  return result
+                },
+                tensor_input={"dt": tensor},
+                expected_record_batch={"dt": expected_array},
+            )
+        )
+    return result
 
 
 def _make_2d_varlen_sparse_tensor_test_cases():
-  result = []
-  for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
-    if tf_type == tf.string:
-      values = tf.constant([b"1", b"2", b"3"], dtype=tf.string)
-      expected_array = pa.array([[b"1"], [], [b"2", b"3"], []],
-                                type=pa.large_list(arrow_type))
-    else:
-      values = tf.constant([1, 2, 3], dtype=tf_type)
-      expected_array = pa.array([[1], [], [2, 3], []],
-                                type=pa.large_list(arrow_type))
-    result.append(
-        dict(
-            testcase_name="2d_varlen_sparse_tensor_%s" % tf_type.name,
-            type_specs={"sp1": tf.SparseTensorSpec([None, None], tf_type)},
-            expected_schema={"sp1": pa.large_list(arrow_type)},
-            expected_tensor_representations={
-                "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
-            },
-            tensor_input={
-                "sp1":
-                    tf.SparseTensor(
+    result = []
+    for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
+        if tf_type == tf.string:
+            values = tf.constant([b"1", b"2", b"3"], dtype=tf.string)
+            expected_array = pa.array(
+                [[b"1"], [], [b"2", b"3"], []], type=pa.large_list(arrow_type)
+            )
+        else:
+            values = tf.constant([1, 2, 3], dtype=tf_type)
+            expected_array = pa.array(
+                [[1], [], [2, 3], []], type=pa.large_list(arrow_type)
+            )
+        result.append(
+            dict(
+                testcase_name="2d_varlen_sparse_tensor_%s" % tf_type.name,
+                type_specs={"sp1": tf.SparseTensorSpec([None, None], tf_type)},
+                expected_schema={"sp1": pa.large_list(arrow_type)},
+                expected_tensor_representations={
+                    "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
+                },
+                tensor_input={
+                    "sp1": tf.SparseTensor(
                         values=values,
                         indices=[[0, 0], [2, 0], [2, 1]],
-                        dense_shape=[4, 2]),
-            },
-            expected_record_batch={"sp1": expected_array}))
-  return result
+                        dense_shape=[4, 2],
+                    ),
+                },
+                expected_record_batch={"sp1": expected_array},
+            )
+        )
+    return result
 
 
 def _make_2d_generic_sparse_tensor_test_cases():
-  result = []
-  for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
-    if tf_type == tf.string:
-      values = tf.constant([b"1", b"2", b"3", b"4"], dtype=tf.string)
-      expected_value_array = pa.array([[b"1", b"2", b"3"], [], [b"4"], []],
-                                      type=pa.large_list(arrow_type))
-    else:
-      values = tf.constant([1, 2, 3, 4], dtype=tf_type)
-      expected_value_array = pa.array([[1, 2, 3], [], [4], []],
-                                      type=pa.large_list(arrow_type))
-    result.append(
-        dict(
-            testcase_name="2d_generic_sparse_tensor_%s" % tf_type.name,
-            type_specs={"sp1": tf.SparseTensorSpec([None, 5], tf_type)},
-            expected_schema={
-                "sp1$values": pa.large_list(arrow_type),
-                "sp1$index0": pa.large_list(pa.int64()),
-            },
-            expected_tensor_representations={
-                "sp1":
-                    """sparse_tensor {
+    result = []
+    for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
+        if tf_type == tf.string:
+            values = tf.constant([b"1", b"2", b"3", b"4"], dtype=tf.string)
+            expected_value_array = pa.array(
+                [[b"1", b"2", b"3"], [], [b"4"], []], type=pa.large_list(arrow_type)
+            )
+        else:
+            values = tf.constant([1, 2, 3, 4], dtype=tf_type)
+            expected_value_array = pa.array(
+                [[1, 2, 3], [], [4], []], type=pa.large_list(arrow_type)
+            )
+        result.append(
+            dict(
+                testcase_name="2d_generic_sparse_tensor_%s" % tf_type.name,
+                type_specs={"sp1": tf.SparseTensorSpec([None, 5], tf_type)},
+                expected_schema={
+                    "sp1$values": pa.large_list(arrow_type),
+                    "sp1$index0": pa.large_list(pa.int64()),
+                },
+                expected_tensor_representations={
+                    "sp1": """sparse_tensor {
                 dense_shape {
                   dim {
                     size: 5
@@ -128,103 +132,118 @@ def _make_2d_generic_sparse_tensor_test_cases():
                 value_column_name: "sp1$values"
                 index_column_names: "sp1$index0"
                 }""",
-            },
-            tensor_input={
-                "sp1":
-                    tf.SparseTensor(
+                },
+                tensor_input={
+                    "sp1": tf.SparseTensor(
                         values=values,
                         indices=[[0, 0], [0, 2], [0, 4], [2, 1]],
-                        dense_shape=[4, 5]),
-            },
-            expected_record_batch={
-                "sp1$values":
-                    expected_value_array,
-                "sp1$index0":
-                    pa.array([[0, 2, 4], [], [1], []],
-                             type=pa.large_list(pa.int64())),
-            },
-            options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
-                generic_sparse_tensor_names=frozenset({"sp1"}))))
-  return result
+                        dense_shape=[4, 5],
+                    ),
+                },
+                expected_record_batch={
+                    "sp1$values": expected_value_array,
+                    "sp1$index0": pa.array(
+                        [[0, 2, 4], [], [1], []], type=pa.large_list(pa.int64())
+                    ),
+                },
+                options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
+                    generic_sparse_tensor_names=frozenset({"sp1"})
+                ),
+            )
+        )
+    return result
 
 
 def _make_3d_ragged_tensor_test_cases():
-  result = []
-  for row_partition_dtype in _ROW_PARTITION_DTYPES:
-    row_partition_numpy_type = _ROW_PARTITION_DTYPES[row_partition_dtype]
-    for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
-      if tf_type == tf.string:
-        values = tf.RaggedTensor.from_row_splits(
-            values=tf.constant([b"1", b"2", b"3"], dtype=tf_type),
-            row_splits=np.asarray([0, 1, 1, 3, 3],
-                                  dtype=row_partition_numpy_type))
-        expected_array = pa.array([[[b"1"], [], [b"2", b"3"]], [[]]],
-                                  type=pa.large_list(pa.large_list(arrow_type)))
-      else:
-        values = tf.RaggedTensor.from_row_splits(
-            values=tf.constant([1, 2, 3], dtype=tf_type),
-            row_splits=np.asarray([0, 1, 1, 3, 3],
-                                  dtype=row_partition_numpy_type))
-        expected_array = pa.array([[[1], [], [2, 3]], [[]]],
-                                  type=pa.large_list(pa.large_list(arrow_type)))
-      result.append(
-          dict(
-              testcase_name="3d_ragged_tensor_%s_row_partition_dtype_%s" %
-              (tf_type.name, row_partition_dtype),
-              type_specs={
-                  "sp1":
-                      tf.RaggedTensorSpec(
-                          tf.TensorShape([2, None, None]),
-                          tf_type,
-                          ragged_rank=2,
-                          row_splits_dtype=tf.dtypes.as_dtype(
-                              row_partition_numpy_type))
-              },
-              expected_schema={"sp1": pa.large_list(pa.large_list(arrow_type))},
-              expected_tensor_representations={
-                  "sp1":
-                      """ragged_tensor {
+    result = []
+    for row_partition_dtype in _ROW_PARTITION_DTYPES:
+        row_partition_numpy_type = _ROW_PARTITION_DTYPES[row_partition_dtype]
+        for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
+            if tf_type == tf.string:
+                values = tf.RaggedTensor.from_row_splits(
+                    values=tf.constant([b"1", b"2", b"3"], dtype=tf_type),
+                    row_splits=np.asarray(
+                        [0, 1, 1, 3, 3], dtype=row_partition_numpy_type
+                    ),
+                )
+                expected_array = pa.array(
+                    [[[b"1"], [], [b"2", b"3"]], [[]]],
+                    type=pa.large_list(pa.large_list(arrow_type)),
+                )
+            else:
+                values = tf.RaggedTensor.from_row_splits(
+                    values=tf.constant([1, 2, 3], dtype=tf_type),
+                    row_splits=np.asarray(
+                        [0, 1, 1, 3, 3], dtype=row_partition_numpy_type
+                    ),
+                )
+                expected_array = pa.array(
+                    [[[1], [], [2, 3]], [[]]],
+                    type=pa.large_list(pa.large_list(arrow_type)),
+                )
+            result.append(
+                dict(
+                    testcase_name="3d_ragged_tensor_%s_row_partition_dtype_%s"
+                    % (tf_type.name, row_partition_dtype),
+                    type_specs={
+                        "sp1": tf.RaggedTensorSpec(
+                            tf.TensorShape([2, None, None]),
+                            tf_type,
+                            ragged_rank=2,
+                            row_splits_dtype=tf.dtypes.as_dtype(
+                                row_partition_numpy_type
+                            ),
+                        )
+                    },
+                    expected_schema={"sp1": pa.large_list(pa.large_list(arrow_type))},
+                    expected_tensor_representations={
+                        "sp1": """ragged_tensor {
                           feature_path {
                             step: "sp1"
                           }
                           row_partition_dtype: %s
-                        }""" % row_partition_dtype,
-              },
-              tensor_input={
-                  "sp1":
-                      tf.RaggedTensor.from_row_splits(
-                          values=values,
-                          row_splits=np.asarray([0, 3, 4],
-                                                dtype=row_partition_numpy_type))
-              },
-              expected_record_batch={"sp1": expected_array}))
-  return result
+                        }"""
+                        % row_partition_dtype,
+                    },
+                    tensor_input={
+                        "sp1": tf.RaggedTensor.from_row_splits(
+                            values=values,
+                            row_splits=np.asarray(
+                                [0, 3, 4], dtype=row_partition_numpy_type
+                            ),
+                        )
+                    },
+                    expected_record_batch={"sp1": expected_array},
+                )
+            )
+    return result
 
 
 def _make_3d_sparse_tensor_test_cases():
-  result = []
-  for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
-    if tf_type == tf.string:
-      values = tf.constant([b"1", b"2", b"3", b"4"], dtype=tf.string)
-      expected_value_array = pa.array([[b"1", b"2", b"3"], [], [b"4"], []],
-                                      type=pa.large_list(arrow_type))
-    else:
-      values = tf.constant([1, 2, 3, 4], dtype=tf_type)
-      expected_value_array = pa.array([[1, 2, 3], [], [4], []],
-                                      type=pa.large_list(arrow_type))
-
-    result.append(
-        dict(
-            testcase_name="3d_sparse_tensor_%s" % tf_type.name,
-            type_specs={"sp1": tf.SparseTensorSpec([None, 4, 5], tf_type)},
-            expected_schema={
-                "sp1$values": pa.large_list(arrow_type),
-                "sp1$index0": pa.large_list(pa.int64()),
-                "sp1$index1": pa.large_list(pa.int64()),
-            },
-            expected_tensor_representations={
-                "sp1":
-                    """sparse_tensor {
+    result = []
+    for tf_type, arrow_type in _TF_TYPE_TO_ARROW_TYPE.items():
+        if tf_type == tf.string:
+            values = tf.constant([b"1", b"2", b"3", b"4"], dtype=tf.string)
+            expected_value_array = pa.array(
+                [[b"1", b"2", b"3"], [], [b"4"], []], type=pa.large_list(arrow_type)
+            )
+        else:
+            values = tf.constant([1, 2, 3, 4], dtype=tf_type)
+            expected_value_array = pa.array(
+                [[1, 2, 3], [], [4], []], type=pa.large_list(arrow_type)
+            )
+
+        result.append(
+            dict(
+                testcase_name="3d_sparse_tensor_%s" % tf_type.name,
+                type_specs={"sp1": tf.SparseTensorSpec([None, 4, 5], tf_type)},
+                expected_schema={
+                    "sp1$values": pa.large_list(arrow_type),
+                    "sp1$index0": pa.large_list(pa.int64()),
+                    "sp1$index1": pa.large_list(pa.int64()),
+                },
+                expected_tensor_representations={
+                    "sp1": """sparse_tensor {
                 dense_shape {
                   dim {
                     size: 4
@@ -237,144 +256,141 @@ def _make_3d_sparse_tensor_test_cases():
                 index_column_names: "sp1$index0"
                 index_column_names: "sp1$index1"
                 }""",
-            },
-            tensor_input={
-                "sp1":
-                    tf.SparseTensor(
+                },
+                tensor_input={
+                    "sp1": tf.SparseTensor(
                         values=values,
                         indices=[[0, 0, 0], [0, 2, 2], [0, 2, 4], [2, 3, 1]],
-                        dense_shape=[4, 4, 5]),
+                        dense_shape=[4, 4, 5],
+                    ),
+                },
+                expected_record_batch={
+                    "sp1$values": expected_value_array,
+                    "sp1$index0": pa.array(
+                        [[0, 2, 2], [], [3], []], type=pa.large_list(pa.int64())
+                    ),
+                    "sp1$index1": pa.array(
+                        [[0, 2, 4], [], [1], []], type=pa.large_list(pa.int64())
+                    ),
+                },
+            )
+        )
+    return result
+
+
+_CONVERT_TEST_CASES = (
+    [
+        dict(
+            testcase_name="multiple_tensors",
+            type_specs={
+                "sp1": tf.SparseTensorSpec([None, None], tf.int32),
+                "sp2": tf.SparseTensorSpec([None, None], tf.string),
             },
-            expected_record_batch={
-                "sp1$values":
-                    expected_value_array,
-                "sp1$index0":
-                    pa.array([[0, 2, 2], [], [3], []],
-                             type=pa.large_list(pa.int64())),
-                "sp1$index1":
-                    pa.array([[0, 2, 4], [], [1], []],
-                             type=pa.large_list(pa.int64())),
-            }))
-  return result
-
-
-_CONVERT_TEST_CASES = [
-    dict(
-        testcase_name="multiple_tensors",
-        type_specs={
-            "sp1": tf.SparseTensorSpec([None, None], tf.int32),
-            "sp2": tf.SparseTensorSpec([None, None], tf.string),
-        },
-        expected_schema={
-            "sp1": pa.large_list(pa.int32()),
-            "sp2": pa.large_list(pa.large_binary()),
-        },
-        expected_tensor_representations={
-            "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
-            "sp2": """varlen_sparse_tensor { column_name: "sp2" }""",
-        },
-        tensor_input={
-            "sp1":
-                tf.SparseTensor(
+            expected_schema={
+                "sp1": pa.large_list(pa.int32()),
+                "sp2": pa.large_list(pa.large_binary()),
+            },
+            expected_tensor_representations={
+                "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
+                "sp2": """varlen_sparse_tensor { column_name: "sp2" }""",
+            },
+            tensor_input={
+                "sp1": tf.SparseTensor(
                     values=tf.constant([1, 2], dtype=tf.int32),
                     indices=[[0, 0], [2, 0]],
-                    dense_shape=[4, 1]),
-            "sp2":
-                tf.SparseTensor(
-                    values=[b"aa", b"bb"],
-                    indices=[[2, 0], [2, 1]],
-                    dense_shape=[4, 2])
-        },
-        expected_record_batch={
-            "sp1":
-                pa.array([[1], [], [2], []], type=pa.large_list(pa.int32())),
-            "sp2":
-                pa.array([[], [], [b"aa", b"bb"], []],
-                         type=pa.large_list(pa.large_binary()))
-        }),
-    dict(
-        testcase_name="ragged_tensors",
-        type_specs={
-            "sp1":
-                tf.RaggedTensorSpec(
+                    dense_shape=[4, 1],
+                ),
+                "sp2": tf.SparseTensor(
+                    values=[b"aa", b"bb"], indices=[[2, 0], [2, 1]], dense_shape=[4, 2]
+                ),
+            },
+            expected_record_batch={
+                "sp1": pa.array([[1], [], [2], []], type=pa.large_list(pa.int32())),
+                "sp2": pa.array(
+                    [[], [], [b"aa", b"bb"], []], type=pa.large_list(pa.large_binary())
+                ),
+            },
+        ),
+        dict(
+            testcase_name="ragged_tensors",
+            type_specs={
+                "sp1": tf.RaggedTensorSpec(
                     tf.TensorShape([3, None]),
                     tf.int64,
                     ragged_rank=1,
-                    row_splits_dtype=tf.int64),
-            "sp2":
-                tf.RaggedTensorSpec(
+                    row_splits_dtype=tf.int64,
+                ),
+                "sp2": tf.RaggedTensorSpec(
                     tf.TensorShape([3, None]),
                     tf.string,
                     ragged_rank=1,
-                    row_splits_dtype=tf.int64),
-        },
-        expected_schema={
-            "sp1": pa.large_list(pa.int64()),
-            "sp2": pa.large_list(pa.large_binary()),
-        },
-        expected_tensor_representations={
-            "sp1":
-                """ragged_tensor {
+                    row_splits_dtype=tf.int64,
+                ),
+            },
+            expected_schema={
+                "sp1": pa.large_list(pa.int64()),
+                "sp2": pa.large_list(pa.large_binary()),
+            },
+            expected_tensor_representations={
+                "sp1": """ragged_tensor {
                         feature_path {
                           step: "sp1"
                         }
                         row_partition_dtype: INT64
                       }""",
-            "sp2":
-                """ragged_tensor {
+                "sp2": """ragged_tensor {
                         feature_path {
                           step: "sp2"
                         }
                         row_partition_dtype: INT64
                       }""",
-        },
-        tensor_input={
-            "sp1":
-                tf.RaggedTensor.from_row_splits(
+            },
+            tensor_input={
+                "sp1": tf.RaggedTensor.from_row_splits(
                     values=np.asarray([1, 5, 9], dtype=np.int64),
-                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64)),
-            "sp2":
-                tf.RaggedTensor.from_row_splits(
+                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64),
+                ),
+                "sp2": tf.RaggedTensor.from_row_splits(
                     values=np.asarray([b"x", b"y", b"z"], dtype=str),
-                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64))
-        },
-        expected_record_batch={
-            "sp1":
-                pa.array([[1, 5], [], [9]], type=pa.large_list(pa.int64())),
-            "sp2":
-                pa.array([[b"x", b"y"], [], [b"z"]],
-                         type=pa.large_list(pa.large_binary())),
-        }),
-    dict(
-        testcase_name="ragged_tensors_uniform_dims",
-        type_specs={
-            "rt1":
-                tf.RaggedTensorSpec(
+                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64),
+                ),
+            },
+            expected_record_batch={
+                "sp1": pa.array([[1, 5], [], [9]], type=pa.large_list(pa.int64())),
+                "sp2": pa.array(
+                    [[b"x", b"y"], [], [b"z"]], type=pa.large_list(pa.large_binary())
+                ),
+            },
+        ),
+        dict(
+            testcase_name="ragged_tensors_uniform_dims",
+            type_specs={
+                "rt1": tf.RaggedTensorSpec(
                     tf.TensorShape([3, None, 2]),
                     tf.int64,
                     ragged_rank=1,
-                    row_splits_dtype=tf.int64),
-            "rt2":
-                tf.RaggedTensorSpec(
+                    row_splits_dtype=tf.int64,
+                ),
+                "rt2": tf.RaggedTensorSpec(
                     tf.TensorShape([3, None, 2, 3]),
                     tf.string,
                     ragged_rank=1,
-                    row_splits_dtype=tf.int64),
-            "rt3":
-                tf.RaggedTensorSpec(
+                    row_splits_dtype=tf.int64,
+                ),
+                "rt3": tf.RaggedTensorSpec(
                     tf.TensorShape([3, None, None, 2]),
                     tf.float32,
                     ragged_rank=2,
-                    row_splits_dtype=tf.int64),
-        },
-        expected_schema={
-            "rt1": pa.large_list(pa.int64()),
-            "rt2": pa.large_list(pa.large_binary()),
-            "rt3": pa.large_list(pa.large_list(pa.float32())),
-        },
-        expected_tensor_representations={
-            "rt1":
-                """ragged_tensor {
+                    row_splits_dtype=tf.int64,
+                ),
+            },
+            expected_schema={
+                "rt1": pa.large_list(pa.int64()),
+                "rt2": pa.large_list(pa.large_binary()),
+                "rt3": pa.large_list(pa.large_list(pa.float32())),
+            },
+            expected_tensor_representations={
+                "rt1": """ragged_tensor {
                         feature_path {
                           step: "rt1"
                         }
@@ -383,8 +399,7 @@ def _make_3d_sparse_tensor_test_cases():
                           uniform_row_length: 2
                         }
                       }""",
-            "rt2":
-                """ragged_tensor {
+                "rt2": """ragged_tensor {
                         feature_path {
                           step: "rt2"
                         }
@@ -396,8 +411,7 @@ def _make_3d_sparse_tensor_test_cases():
                           uniform_row_length: 3
                         }
                       }""",
-            "rt3":
-                """ragged_tensor {
+                "rt3": """ragged_tensor {
                         feature_path {
                           step: "rt3"
                         }
@@ -406,105 +420,128 @@ def _make_3d_sparse_tensor_test_cases():
                           uniform_row_length: 2
                         }
                       }""",
-        },
-        tensor_input={
-            "rt1":
-                tf.RaggedTensor.from_row_splits(
+            },
+            tensor_input={
+                "rt1": tf.RaggedTensor.from_row_splits(
                     values=np.asarray([[1, 3], [5, 4], [9, 7]], dtype=np.int64),
-                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64)),
-            "rt2":
-                tf.RaggedTensor.from_row_splits(
+                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64),
+                ),
+                "rt2": tf.RaggedTensor.from_row_splits(
                     values=np.asarray(
-                        [[[b"x", b"a", b"a"], [b"x", b"c", b"a"]],
-                         [[b"y", b"a", b"a"], [b"y", b"c", b"a"]],
-                         [[b"z", b"a", b"a"], [b"z", b"c", b"a"]]],
-                        dtype=str),
-                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64)),
-            "rt3":
-                tf.RaggedTensor.from_row_splits(
+                        [
+                            [[b"x", b"a", b"a"], [b"x", b"c", b"a"]],
+                            [[b"y", b"a", b"a"], [b"y", b"c", b"a"]],
+                            [[b"z", b"a", b"a"], [b"z", b"c", b"a"]],
+                        ],
+                        dtype=str,
+                    ),
+                    row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64),
+                ),
+                "rt3": tf.RaggedTensor.from_row_splits(
                     values=tf.RaggedTensor.from_row_splits(
-                        values=np.asarray([[1.0, 3.0], [5.0, 4.0], [9.0, 7.0]],
-                                          dtype=np.float32),
-                        row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64)),
-                    row_splits=np.asarray([0, 0, 2, 3], dtype=np.int64)),
-        },
-        expected_record_batch={
-            "rt1":
-                pa.array([[1, 3, 5, 4], [], [9, 7]],
-                         type=pa.large_list(pa.int64())),
-            "rt2":
-                pa.array([[
-                    b"x", b"a", b"a", b"x", b"c", b"a", b"y", b"a", b"a", b"y",
-                    b"c", b"a"
-                ], [], [b"z", b"a", b"a", b"z", b"c", b"a"]],
-                         type=pa.large_list(pa.large_binary())),
-            "rt3":
-                pa.array([[], [[1.0, 3.0, 5.0, 4.0], []], [[9.0, 7.0]]],
-                         type=pa.large_list(pa.large_list(pa.float32()))),
-        }),
-    dict(
-        testcase_name="sparse_tensor_no_value",
-        type_specs={
-            "sp1": tf.SparseTensorSpec([None, None], tf.int32),
-        },
-        expected_schema={
-            "sp1": pa.large_list(pa.int32()),
-        },
-        expected_tensor_representations={
-            "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
-        },
-        tensor_input={
-            "sp1":
-                tf.SparseTensor(
+                        values=np.asarray(
+                            [[1.0, 3.0], [5.0, 4.0], [9.0, 7.0]], dtype=np.float32
+                        ),
+                        row_splits=np.asarray([0, 2, 2, 3], dtype=np.int64),
+                    ),
+                    row_splits=np.asarray([0, 0, 2, 3], dtype=np.int64),
+                ),
+            },
+            expected_record_batch={
+                "rt1": pa.array(
+                    [[1, 3, 5, 4], [], [9, 7]], type=pa.large_list(pa.int64())
+                ),
+                "rt2": pa.array(
+                    [
+                        [
+                            b"x",
+                            b"a",
+                            b"a",
+                            b"x",
+                            b"c",
+                            b"a",
+                            b"y",
+                            b"a",
+                            b"a",
+                            b"y",
+                            b"c",
+                            b"a",
+                        ],
+                        [],
+                        [b"z", b"a", b"a", b"z", b"c", b"a"],
+                    ],
+                    type=pa.large_list(pa.large_binary()),
+                ),
+                "rt3": pa.array(
+                    [[], [[1.0, 3.0, 5.0, 4.0], []], [[9.0, 7.0]]],
+                    type=pa.large_list(pa.large_list(pa.float32())),
+                ),
+            },
+        ),
+        dict(
+            testcase_name="sparse_tensor_no_value",
+            type_specs={
+                "sp1": tf.SparseTensorSpec([None, None], tf.int32),
+            },
+            expected_schema={
+                "sp1": pa.large_list(pa.int32()),
+            },
+            expected_tensor_representations={
+                "sp1": """varlen_sparse_tensor { column_name: "sp1" }""",
+            },
+            tensor_input={
+                "sp1": tf.SparseTensor(
                     values=tf.constant([], dtype=tf.int32),
                     indices=tf.constant([], shape=(0, 2), dtype=tf.int64),
-                    dense_shape=[2, 0]),
-        },
-        expected_record_batch={
-            "sp1": pa.array([[], []], type=pa.large_list(pa.int32())),
-        }),
-    dict(
-        testcase_name="1d_dense",
-        type_specs={
-            "dt1": tf.TensorSpec([None], tf.int32),
-        },
-        expected_schema={
-            "dt1": pa.large_list(pa.int32()),
-        },
-        expected_tensor_representations={
-            "dt1": """dense_tensor { column_name: "dt1" }""",
-        },
-        tensor_input={
-            "dt1": tf.constant([1, 2, 3], dtype=tf.int32),
-        },
-        expected_record_batch={
-            "dt1": pa.array([[1], [2], [3]], type=pa.large_list(pa.int32())),
-        }),
-    dict(
-        testcase_name="empty_2d_dense",
-        type_specs={"dt": tf.TensorSpec([None, 0], tf.float32)},
-        expected_schema={"dt": pa.large_list(pa.float32())},
-        expected_tensor_representations={
-            "dt":
-                """dense_tensor {
+                    dense_shape=[2, 0],
+                ),
+            },
+            expected_record_batch={
+                "sp1": pa.array([[], []], type=pa.large_list(pa.int32())),
+            },
+        ),
+        dict(
+            testcase_name="1d_dense",
+            type_specs={
+                "dt1": tf.TensorSpec([None], tf.int32),
+            },
+            expected_schema={
+                "dt1": pa.large_list(pa.int32()),
+            },
+            expected_tensor_representations={
+                "dt1": """dense_tensor { column_name: "dt1" }""",
+            },
+            tensor_input={
+                "dt1": tf.constant([1, 2, 3], dtype=tf.int32),
+            },
+            expected_record_batch={
+                "dt1": pa.array([[1], [2], [3]], type=pa.large_list(pa.int32())),
+            },
+        ),
+        dict(
+            testcase_name="empty_2d_dense",
+            type_specs={"dt": tf.TensorSpec([None, 0], tf.float32)},
+            expected_schema={"dt": pa.large_list(pa.float32())},
+            expected_tensor_representations={
+                "dt": """dense_tensor {
                          column_name: "dt"
                          shape { dim { size: 0} }
                        }""",
-        },
-        tensor_input={"dt": tf.constant(np.empty((2, 0), dtype=np.float32))},
-        expected_record_batch={
-            "dt": pa.array([[], []], type=pa.large_list(pa.float32()))
-        }),
-    dict(
-        testcase_name="generic_sparse_unknown_dense_shape",
-        type_specs={"sp": tf.SparseTensorSpec([None, None], tf.float32)},
-        expected_schema={
-            "sp$values": pa.large_list(pa.float32()),
-            "sp$index0": pa.large_list(pa.int64()),
-        },
-        expected_tensor_representations={
-            "sp":
-                """sparse_tensor {
+            },
+            tensor_input={"dt": tf.constant(np.empty((2, 0), dtype=np.float32))},
+            expected_record_batch={
+                "dt": pa.array([[], []], type=pa.large_list(pa.float32()))
+            },
+        ),
+        dict(
+            testcase_name="generic_sparse_unknown_dense_shape",
+            type_specs={"sp": tf.SparseTensorSpec([None, None], tf.float32)},
+            expected_schema={
+                "sp$values": pa.large_list(pa.float32()),
+                "sp$index0": pa.large_list(pa.int64()),
+            },
+            expected_tensor_representations={
+                "sp": """sparse_tensor {
                 dense_shape {
                   dim {
                     size: -1
@@ -513,35 +550,36 @@ def _make_3d_sparse_tensor_test_cases():
                 value_column_name: "sp$values"
                 index_column_names: "sp$index0"
                 }""",
-        },
-        tensor_input={
-            "sp":
-                tf.SparseTensor(
+            },
+            tensor_input={
+                "sp": tf.SparseTensor(
                     values=np.asarray([2, 3, 6, 7], dtype=np.float32),
                     indices=[[0, 0], [0, 2], [0, 4], [2, 1]],
-                    dense_shape=[4, -1]),
-        },
-        expected_record_batch={
-            "sp$values":
-                pa.array([[2, 3, 6], [], [7], []],
-                         type=pa.large_list(pa.float32())),
-            "sp$index0":
-                pa.array([[0, 2, 4], [], [1], []],
-                         type=pa.large_list(pa.int64())),
-        },
-        options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
-            generic_sparse_tensor_names=frozenset({"sp"}))),
-    dict(
-        testcase_name="generic_sparse_partially_known_dense_shape",
-        type_specs={"sp": tf.SparseTensorSpec([None, None, 10], tf.float32)},
-        expected_schema={
-            "sp$values": pa.large_list(pa.float32()),
-            "sp$index0": pa.large_list(pa.int64()),
-            "sp$index1": pa.large_list(pa.int64()),
-        },
-        expected_tensor_representations={
-            "sp":
-                """sparse_tensor {
+                    dense_shape=[4, -1],
+                ),
+            },
+            expected_record_batch={
+                "sp$values": pa.array(
+                    [[2, 3, 6], [], [7], []], type=pa.large_list(pa.float32())
+                ),
+                "sp$index0": pa.array(
+                    [[0, 2, 4], [], [1], []], type=pa.large_list(pa.int64())
+                ),
+            },
+            options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
+                generic_sparse_tensor_names=frozenset({"sp"})
+            ),
+        ),
+        dict(
+            testcase_name="generic_sparse_partially_known_dense_shape",
+            type_specs={"sp": tf.SparseTensorSpec([None, None, 10], tf.float32)},
+            expected_schema={
+                "sp$values": pa.large_list(pa.float32()),
+                "sp$index0": pa.large_list(pa.int64()),
+                "sp$index1": pa.large_list(pa.int64()),
+            },
+            expected_tensor_representations={
+                "sp": """sparse_tensor {
                 dense_shape {
                   dim {
                     size: -1
@@ -554,181 +592,204 @@ def _make_3d_sparse_tensor_test_cases():
                 index_column_names: "sp$index0"
                 index_column_names: "sp$index1"
                 }""",
-        },
-        tensor_input={
-            "sp":
-                tf.SparseTensor(
+            },
+            tensor_input={
+                "sp": tf.SparseTensor(
                     values=np.asarray([2, 3], dtype=np.float32),
                     indices=[[0, 0, 0], [1, 2, 7]],
-                    dense_shape=[2, -1, 10]),
-        },
-        expected_record_batch={
-            "sp$values":
-                pa.array([[2], [3]],
-                         type=pa.large_list(pa.float32())),
-            "sp$index0":
-                pa.array([[0], [2]],
-                         type=pa.large_list(pa.int64())),
-            "sp$index1":
-                pa.array([[0], [7]],
-                         type=pa.large_list(pa.int64())),
-        },
-        options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
-            generic_sparse_tensor_names=frozenset({"sp"})))
-] + _make_2d_varlen_sparse_tensor_test_cases(
-) + _make_2d_generic_sparse_tensor_test_cases(
-) + _make_3d_ragged_tensor_test_cases() + _make_2d_dense_tensor_test_cases(
-) + _make_3d_sparse_tensor_test_cases()
+                    dense_shape=[2, -1, 10],
+                ),
+            },
+            expected_record_batch={
+                "sp$values": pa.array([[2], [3]], type=pa.large_list(pa.float32())),
+                "sp$index0": pa.array([[0], [2]], type=pa.large_list(pa.int64())),
+                "sp$index1": pa.array([[0], [7]], type=pa.large_list(pa.int64())),
+            },
+            options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(
+                generic_sparse_tensor_names=frozenset({"sp"})
+            ),
+        ),
+    ]
+    + _make_2d_varlen_sparse_tensor_test_cases()
+    + _make_2d_generic_sparse_tensor_test_cases()
+    + _make_3d_ragged_tensor_test_cases()
+    + _make_2d_dense_tensor_test_cases()
+    + _make_3d_sparse_tensor_test_cases()
+)
 
 
 class TensorToArrowTest(test_case.TfxBslTestCase):
-
-  @test_case.named_parameters(*_CONVERT_TEST_CASES)
-  def test_convert(
-      self,
-      type_specs,
-      expected_schema,
-      expected_tensor_representations,
-      tensor_input,
-      expected_record_batch,
-      options=tensor_to_arrow.TensorsToRecordBatchConverter.Options()):
-
-    def convert_and_check(tensors, test_values_conversion):
-      converter = tensor_to_arrow.TensorsToRecordBatchConverter(
-          type_specs, options)
-
-      self.assertEqual({f.name: f.type for f in converter.arrow_schema()},
-                       expected_schema,
-                       "actual: {}".format(converter.arrow_schema()))
-
-      canonical_expected_tensor_representations = {}
-      for n, r in expected_tensor_representations.items():
-        if not isinstance(r, schema_pb2.TensorRepresentation):
-          r = text_format.Parse(r, schema_pb2.TensorRepresentation())
-        canonical_expected_tensor_representations[n] = r
-
-      self.assertEqual(canonical_expected_tensor_representations,
-                       converter.tensor_representations())
-
-      rb = converter.convert(tensors)
-      self.assertLen(expected_record_batch, rb.num_columns)
-      for i, column in enumerate(rb):
-        expected = expected_record_batch[rb.schema[i].name]
-        self.assertTrue(
-            column.equals(expected),
-            "{}: actual: {}, expected: {}".format(rb.schema[i].name, column,
-                                                  expected))
-      # Test that TensorAdapter(TensorsToRecordBatchConverter()) is identity.
-      adapter = tensor_adapter.TensorAdapter(
-          tensor_adapter.TensorAdapterConfig(
-              arrow_schema=converter.arrow_schema(),
-              tensor_representations=converter.tensor_representations()))
-      adapter_output = adapter.ToBatchTensors(
-          rb, produce_eager_tensors=not test_values_conversion)
-      self.assertEqual(adapter_output.keys(), tensors.keys())
-      for k in adapter_output.keys():
-        if "value" not in k:
-          self.assertTensorsEqual(adapter_output[k], tensors[k])
-
-    def ragged_tensor_to_value(tensor):
-      if isinstance(tensor, tf.RaggedTensor):
-        values = tensor.values
-        return tf.compat.v1.ragged.RaggedTensorValue(
-            values=ragged_tensor_to_value(values),
-            row_splits=tensor.row_splits.numpy())
-      else:
-        return tensor.numpy()
-
-    def convert_eager_to_value(tensor):
-      if isinstance(tensor, tf.SparseTensor):
-        return tf.compat.v1.SparseTensorValue(tensor.indices, tensor.values,
-                                              tensor.dense_shape)
-      elif isinstance(tensor, tf.Tensor):
-        return tensor.numpy()
-      elif isinstance(tensor, tf.RaggedTensor):
-        return ragged_tensor_to_value(tensor)
-      else:
-        raise NotImplementedError(
-            "Only support converting SparseTensors, Tensors and RaggedTensors. "
-            "Got: {}".format(type(tensor)))
-
-    if tf.executing_eagerly():
-      values_input = {
-          k: convert_eager_to_value(v) for k, v in tensor_input.items()
-      }
-    else:
-      some_tensor = next(iter(tensor_input.values()))
-      graph = (
-          some_tensor.row_splits[0].graph
-          if isinstance(some_tensor, tf.RaggedTensor) else some_tensor.graph)
-      with tf.compat.v1.Session(graph=graph) as s:
-        values_input = s.run(tensor_input)
-    convert_and_check(values_input, test_values_conversion=True)
-
-  def test_relaxed_varlen_sparse_tensor(self):
-    # Demonstrates that TensorAdapter(TensorsToRecordBatchConverter()) is not
-    # an identity if the second dense dimension of SparseTensor is not tight.
-    type_specs = {"sp": tf.SparseTensorSpec([None, None], tf.int32)}
-    sp = tf.compat.v1.SparseTensorValue(
-        values=np.array([1, 2], np.int32),
-        indices=[[0, 0], [2, 0]],
-        dense_shape=[4, 2])
-    sp = tf.SparseTensor.from_value(sp)
-    converter = tensor_to_arrow.TensorsToRecordBatchConverter(type_specs)
-    rb = converter.convert({"sp": sp})
-    adapter = tensor_adapter.TensorAdapter(
-        tensor_adapter.TensorAdapterConfig(
-            arrow_schema=converter.arrow_schema(),
-            tensor_representations=converter.tensor_representations()))
-    adapter_output = adapter.ToBatchTensors(rb, produce_eager_tensors=True)
-    self.assertAllEqual(sp.values, adapter_output["sp"].values)
-    self.assertAllEqual(sp.indices, adapter_output["sp"].indices)
-    self.assertAllEqual(adapter_output["sp"].dense_shape, [4, 1])
-
-  def test_unable_to_handle(self):
-    with self.assertRaisesRegex(ValueError, "No handler found"):
-      tensor_to_arrow.TensorsToRecordBatchConverter(
-          {"sp": tf.SparseTensorSpec([None, None], tf.bool)})
-
-  def test_incompatible_type_spec(self):
-    converter = tensor_to_arrow.TensorsToRecordBatchConverter(
-        {"sp": tf.SparseTensorSpec([None, None], tf.int32)})
-    with self.assertRaisesRegex(TypeError, "Expected SparseTensorSpec"):
-      converter.convert({
-          "sp":
-              tf.SparseTensor(
-                  indices=[[0, 1]],
-                  values=tf.constant([0], dtype=tf.int64),
-                  dense_shape=[4, 1])
-      })
-
-  @test_case.named_parameters(*[
-      dict(
-          testcase_name="bool_value_type",
-          spec=tf.RaggedTensorSpec(
-              shape=[2, None, None],
-              dtype=tf.bool,
-              ragged_rank=2,
-              row_splits_dtype=tf.int64)),
-      dict(
-          testcase_name="ragged_outer_uniform_dim",
-          spec=tf.RaggedTensorSpec(
-              shape=[3, 2, None],
-              dtype=tf.int32,
-              ragged_rank=1,
-              row_splits_dtype=tf.int64)),
-      dict(
-          testcase_name="ragged_rank_less_than_one",
-          spec=tf.RaggedTensorSpec(
-              shape=[2],
-              dtype=tf.int32,
-              ragged_rank=0,
-              row_splits_dtype=tf.int64)),
-  ])
-  def test_unable_to_handle_ragged(self, spec):
-    with self.assertRaisesRegex(ValueError, "No handler found"):
-      tensor_to_arrow.TensorsToRecordBatchConverter({"rt": spec})
+    @test_case.named_parameters(*_CONVERT_TEST_CASES)
+    def test_convert(
+        self,
+        type_specs,
+        expected_schema,
+        expected_tensor_representations,
+        tensor_input,
+        expected_record_batch,
+        options=tensor_to_arrow.TensorsToRecordBatchConverter.Options(),
+    ):
+        def convert_and_check(tensors, test_values_conversion):
+            converter = tensor_to_arrow.TensorsToRecordBatchConverter(
+                type_specs, options
+            )
+
+            self.assertEqual(
+                {f.name: f.type for f in converter.arrow_schema()},
+                expected_schema,
+                f"actual: {converter.arrow_schema()}",
+            )
+
+            canonical_expected_tensor_representations = {}
+            for n, r in expected_tensor_representations.items():
+                if not isinstance(r, schema_pb2.TensorRepresentation):
+                    r = text_format.Parse(r, schema_pb2.TensorRepresentation())
+                canonical_expected_tensor_representations[n] = r
+
+            self.assertEqual(
+                canonical_expected_tensor_representations,
+                converter.tensor_representations(),
+            )
+
+            rb = converter.convert(tensors)
+            self.assertLen(expected_record_batch, rb.num_columns)
+            for i, column in enumerate(rb):
+                expected = expected_record_batch[rb.schema[i].name]
+                self.assertTrue(
+                    column.equals(expected),
+                    f"{rb.schema[i].name}: actual: {column}, expected: {expected}",
+                )
+            # Test that TensorAdapter(TensorsToRecordBatchConverter()) is identity.
+            adapter = tensor_adapter.TensorAdapter(
+                tensor_adapter.TensorAdapterConfig(
+                    arrow_schema=converter.arrow_schema(),
+                    tensor_representations=converter.tensor_representations(),
+                )
+            )
+            adapter_output = adapter.ToBatchTensors(
+                rb, produce_eager_tensors=not test_values_conversion
+            )
+            self.assertEqual(adapter_output.keys(), tensors.keys())
+            for k in adapter_output.keys():
+                if "value" not in k:
+                    self.assertTensorsEqual(adapter_output[k], tensors[k])
+
+        def ragged_tensor_to_value(tensor):
+            if isinstance(tensor, tf.RaggedTensor):
+                values = tensor.values
+                return tf.compat.v1.ragged.RaggedTensorValue(
+                    values=ragged_tensor_to_value(values),
+                    row_splits=tensor.row_splits.numpy(),
+                )
+            else:
+                return tensor.numpy()
+
+        def convert_eager_to_value(tensor):
+            if isinstance(tensor, tf.SparseTensor):
+                return tf.compat.v1.SparseTensorValue(
+                    tensor.indices, tensor.values, tensor.dense_shape
+                )
+            elif isinstance(tensor, tf.Tensor):
+                return tensor.numpy()
+            elif isinstance(tensor, tf.RaggedTensor):
+                return ragged_tensor_to_value(tensor)
+            else:
+                raise NotImplementedError(
+                    "Only support converting SparseTensors, Tensors and RaggedTensors. "
+                    f"Got: {type(tensor)}"
+                )
+
+        if tf.executing_eagerly():
+            values_input = {
+                k: convert_eager_to_value(v) for k, v in tensor_input.items()
+            }
+        else:
+            some_tensor = next(iter(tensor_input.values()))
+            graph = (
+                some_tensor.row_splits[0].graph
+                if isinstance(some_tensor, tf.RaggedTensor)
+                else some_tensor.graph
+            )
+            with tf.compat.v1.Session(graph=graph) as s:
+                values_input = s.run(tensor_input)
+        convert_and_check(values_input, test_values_conversion=True)
+
+    def test_relaxed_varlen_sparse_tensor(self):
+        # Demonstrates that TensorAdapter(TensorsToRecordBatchConverter()) is not
+        # an identity if the second dense dimension of SparseTensor is not tight.
+        type_specs = {"sp": tf.SparseTensorSpec([None, None], tf.int32)}
+        sp = tf.compat.v1.SparseTensorValue(
+            values=np.array([1, 2], np.int32),
+            indices=[[0, 0], [2, 0]],
+            dense_shape=[4, 2],
+        )
+        sp = tf.SparseTensor.from_value(sp)
+        converter = tensor_to_arrow.TensorsToRecordBatchConverter(type_specs)
+        rb = converter.convert({"sp": sp})
+        adapter = tensor_adapter.TensorAdapter(
+            tensor_adapter.TensorAdapterConfig(
+                arrow_schema=converter.arrow_schema(),
+                tensor_representations=converter.tensor_representations(),
+            )
+        )
+        adapter_output = adapter.ToBatchTensors(rb, produce_eager_tensors=True)
+        self.assertAllEqual(sp.values, adapter_output["sp"].values)
+        self.assertAllEqual(sp.indices, adapter_output["sp"].indices)
+        self.assertAllEqual(adapter_output["sp"].dense_shape, [4, 1])
+
+    def test_unable_to_handle(self):
+        with self.assertRaisesRegex(ValueError, "No handler found"):
+            tensor_to_arrow.TensorsToRecordBatchConverter(
+                {"sp": tf.SparseTensorSpec([None, None], tf.bool)}
+            )
+
+    def test_incompatible_type_spec(self):
+        converter = tensor_to_arrow.TensorsToRecordBatchConverter(
+            {"sp": tf.SparseTensorSpec([None, None], tf.int32)}
+        )
+        with self.assertRaisesRegex(TypeError, "Expected SparseTensorSpec"):
+            converter.convert(
+                {
+                    "sp": tf.SparseTensor(
+                        indices=[[0, 1]],
+                        values=tf.constant([0], dtype=tf.int64),
+                        dense_shape=[4, 1],
+                    )
+                }
+            )
+
+    @test_case.named_parameters(
+        *[
+            dict(
+                testcase_name="bool_value_type",
+                spec=tf.RaggedTensorSpec(
+                    shape=[2, None, None],
+                    dtype=tf.bool,
+                    ragged_rank=2,
+                    row_splits_dtype=tf.int64,
+                ),
+            ),
+            dict(
+                testcase_name="ragged_outer_uniform_dim",
+                spec=tf.RaggedTensorSpec(
+                    shape=[3, 2, None],
+                    dtype=tf.int32,
+                    ragged_rank=1,
+                    row_splits_dtype=tf.int64,
+                ),
+            ),
+            dict(
+                testcase_name="ragged_rank_less_than_one",
+                spec=tf.RaggedTensorSpec(
+                    shape=[2], dtype=tf.int32, ragged_rank=0, row_splits_dtype=tf.int64
+                ),
+            ),
+        ]
+    )
+    def test_unable_to_handle_ragged(self, spec):
+        with self.assertRaisesRegex(ValueError, "No handler found"):
+            tensor_to_arrow.TensorsToRecordBatchConverter({"rt": spec})
 
 
 if __name__ == "__main__":
-  test_case.main()
+    test_case.main()
diff --git a/tfx_bsl/tfxio/test_case.py b/tfx_bsl/tfxio/test_case.py
index 3cbb5c74..7a91f051 100644
--- a/tfx_bsl/tfxio/test_case.py
+++ b/tfx_bsl/tfxio/test_case.py
@@ -12,13 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Utilities for testing tfx_bsl.tfxio functionality."""
+
 from typing import Optional
 
-from absl.testing import parameterized
 import tensorflow as tf
-from tfx_bsl.types import common_types
+from absl.testing import absltest, parameterized
 
-from absl.testing import absltest
+from tfx_bsl.types import common_types
 
 named_parameters = parameterized.named_parameters
 SkipTest = absltest.SkipTest
@@ -27,21 +27,22 @@
 
 
 class TfxBslTestCase(parameterized.TestCase, tf.test.TestCase):
-  """Base test class for testing tfxio code."""
-
-  def assertTensorsEqual(self,
-                         left: common_types.TensorAlike,
-                         right: common_types.TensorAlike,
-                         msg: Optional[str] = None):
-    """Checks whether two tensors or tensor values are equal."""
-    self.assertIsInstance(left, type(right), msg)
-    if isinstance(left, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
-      self.assertAllEqual(left.values, right.values, msg)
-      self.assertAllEqual(left.indices, right.indices, msg)
-      self.assertAllEqual(left.dense_shape, right.dense_shape, msg)
-    elif isinstance(left,
-                    (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)):
-      self.assertTensorsEqual(left.values, right.values, msg)
-      self.assertAllEqual(left.row_splits, right.row_splits, msg)
-    else:
-      self.assertAllEqual(left, right, msg)
+    """Base test class for testing tfxio code."""
+
+    def assertTensorsEqual(
+        self,
+        left: common_types.TensorAlike,
+        right: common_types.TensorAlike,
+        msg: Optional[str] = None,
+    ):
+        """Checks whether two tensors or tensor values are equal."""
+        self.assertIsInstance(left, type(right), msg)
+        if isinstance(left, (tf.SparseTensor, tf.compat.v1.SparseTensorValue)):
+            self.assertAllEqual(left.values, right.values, msg)
+            self.assertAllEqual(left.indices, right.indices, msg)
+            self.assertAllEqual(left.dense_shape, right.dense_shape, msg)
+        elif isinstance(left, (tf.RaggedTensor, tf.compat.v1.ragged.RaggedTensorValue)):
+            self.assertTensorsEqual(left.values, right.values, msg)
+            self.assertAllEqual(left.row_splits, right.row_splits, msg)
+        else:
+            self.assertAllEqual(left, right, msg)
diff --git a/tfx_bsl/tfxio/test_util.py b/tfx_bsl/tfxio/test_util.py
index 3c406899..067d7ccd 100644
--- a/tfx_bsl/tfxio/test_util.py
+++ b/tfx_bsl/tfxio/test_util.py
@@ -13,21 +13,24 @@
 # limitations under the License.
 """Contains TFXIO helpers for testing purposes."""
 
-from typing import  Optional, Text
-
-from tfx_bsl.tfxio import tf_example_record
+from typing import Optional, Text
 
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import tf_example_record
+
 
 # DEPRECATED. Prefer tf_example_record.TFExampleBeamRecord.
 # TODO(b/158580478): clean this up.
 class InMemoryTFExampleRecord(tf_example_record.TFExampleBeamRecord):
-
-  def __init__(self, schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[Text] = None):
-    super().__init__(
-        physical_format="inmem",
-        telemetry_descriptors=["test", "component"],
-        schema=schema,
-        raw_record_column_name=raw_record_column_name)
+    def __init__(
+        self,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        super().__init__(
+            physical_format="inmem",
+            telemetry_descriptors=["test", "component"],
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+        )
diff --git a/tfx_bsl/tfxio/test_util_test.py b/tfx_bsl/tfxio/test_util_test.py
index 083d2916..c5caf49d 100644
--- a/tfx_bsl/tfxio/test_util_test.py
+++ b/tfx_bsl/tfxio/test_util_test.py
@@ -13,23 +13,24 @@
 # limitations under the License.
 """Tests for tfx_bsl.tfxio.test_util."""
 
-import pytest
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.tfxio import test_util
-
-from google.protobuf import text_format
 from absl.testing import absltest
+from apache_beam.testing import util as beam_testing_util
+from google.protobuf import text_format
+
+from tfx_bsl.tfxio import test_util
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class TestUtilTest(absltest.TestCase):
-
-  def testGetRecordBatches(self):
-    tfxio = test_util.InMemoryTFExampleRecord()
-    examples = [text_format.Parse("""
+    def testGetRecordBatches(self):
+        tfxio = test_util.InMemoryTFExampleRecord()
+        examples = [
+            text_format.Parse(
+                """
     features {
       feature {
         key: "f1"
@@ -39,19 +40,26 @@ def testGetRecordBatches(self):
           }
         }
       }
-    }""", tf.train.Example()).SerializeToString()]
-    def _AssertFn(record_batches):
-      self.assertLen(record_batches, 1)
-      record_batch = record_batches[0]
-      self.assertEqual(record_batch.num_rows, 1)
-      self.assertEqual(record_batch.num_columns, 1)
-      self.assertTrue(record_batch[0].equals(
-          pa.array([[123]], type=pa.large_list(pa.int64()))))
+    }""",
+                tf.train.Example(),
+            ).SerializeToString()
+        ]
+
+        def _AssertFn(record_batches):
+            self.assertLen(record_batches, 1)
+            record_batch = record_batches[0]
+            self.assertEqual(record_batch.num_rows, 1)
+            self.assertEqual(record_batch.num_columns, 1)
+            self.assertTrue(
+                record_batch[0].equals(
+                    pa.array([[123]], type=pa.large_list(pa.int64()))
+                )
+            )
 
-    with beam.Pipeline() as p:
-      record_batches = p | beam.Create(examples) | tfxio.BeamSource()
-      beam_testing_util.assert_that(record_batches, _AssertFn)
+        with beam.Pipeline() as p:
+            record_batches = p | beam.Create(examples) | tfxio.BeamSource()
+            beam_testing_util.assert_that(record_batches, _AssertFn)
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/tf_example_record.py b/tfx_bsl/tfxio/tf_example_record.py
index e0859916..04bfbbfc 100644
--- a/tfx_bsl/tfxio/tf_example_record.py
+++ b/tfx_bsl/tfxio/tf_example_record.py
@@ -19,392 +19,445 @@
 import apache_beam as beam
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.coders import batch_util
-from tfx_bsl.coders import example_coder
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import dataset_util
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_representation_util
-from tfx_bsl.tfxio import tfxio
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.coders import batch_util, example_coder
+from tfx_bsl.tfxio import (
+    dataset_options,
+    dataset_util,
+    record_based_tfxio,
+    tensor_adapter,
+    tensor_representation_util,
+    tfxio,
+)
+
 _FEATURE_NAME_PREFIX = "_tfx_bsl_"
 
 
 class _TFExampleRecordBase(record_based_tfxio.RecordBasedTFXIO):
-  """Base class for TFXIO implementations for record based tf.Examples."""
-
-  def __init__(self,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[str] = None,
-               telemetry_descriptors: Optional[List[str]] = None,
-               physical_format: Optional[str] = None,
-               schema_for_decoding: Optional[schema_pb2.Schema] = None):
-    # TODO(b/154648951): make telemetry_descriptors and physical_format required
-    # arguments, when TFT's compatibility TFXIO starts setting them.
-    if physical_format is None:
-      physical_format = "unknown"
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        raw_record_column_name=raw_record_column_name,
-        logical_format="tf_example",
-        physical_format=physical_format)
-    self._schema = schema
-
-    if schema_for_decoding is not None:
-      assert schema is not None
-    self._schema_for_decoding = schema_for_decoding
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return True
-
-  @abc.abstractmethod
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    """Returns a PTransform that produces PCollection[bytes]."""
-
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def ptransform_fn(raw_records_pcoll: beam.pvalue.PCollection):
-      return (
-          raw_records_pcoll
-          | "Batch"
-          >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
-          | "Decode"
-          >> beam.ParDo(
-              _DecodeBatchExamplesDoFn(
-                  self._GetSchemaForDecoding(), self.raw_record_column_name
-              )
-          )
-      )
-
-    return beam.ptransform_fn(ptransform_fn)()
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    schema = self._GetSchemaForDecoding()
-    if schema is None:
-      raise ValueError("TFMD schema not provided. Unable to derive an "
-                       "Arrow schema")
-    return example_coder.ExamplesToRecordBatchDecoder(
-        schema.SerializeToString()).ArrowSchema()
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-        self._schema)
-
-  def _GetSchemaForDecoding(self) -> Optional[schema_pb2.Schema]:
-    return (self._schema
-            if self._schema_for_decoding is None else self._schema_for_decoding)
-
-  def _GetTfExampleParserConfig(self) -> Tuple[Dict[str, Any], Dict[str, str]]:
-    """Creates a dict feature spec that can be used in tf.io.parse_example().
-
-    To reduce confusion: 'tensor name' are the keys of TensorRepresentations.
-    'feature name' are the keys to the tf.Example parser config.
-    'column name' are the features in the schema.
-
-    Returns:
-      Two maps. The first is the parser config that maps from feature
-      name to a tf.io Feature. The second is a mapping from feature names to
-      tensor names.
-
-    Raises:
-      ValueError: if the tf.Example parser config is invalid.
-    """
-    if self._schema is None:
-      raise ValueError(
-          "Unable to create a parsing config because no schema is provided.")
-
-    column_name_to_type = {f.name: f.type for f in self._schema.feature}
-    features = {}
-    feature_name_to_tensor_name = {}
-    for tensor_name, tensor_rep in self.TensorRepresentations().items():
-      paths = (
-          tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
-              tensor_rep
-          )
-      )
-      if len(paths) == 1:
-        # The parser config refers to a single tf.Example feature. In this case,
-        # the key to the parser config needs to be the name of the feature.
-        column_name = paths[0].initial_step()
-        value_type = column_name_to_type[column_name]
-      else:
-        # The parser config needs to refer to multiple tf.Example features. In
-        # this case the key to the parser config does not matter. We preserve
-        # the tensor representation key.
-        column_name = tensor_name
-        value_type = column_name_to_type[
-            tensor_representation_util
-            .GetSourceValueColumnFromTensorRepresentation(
-                tensor_rep).initial_step()]
-      parse_config = tensor_representation_util.CreateTfExampleParserConfig(
-          tensor_rep, value_type)
-
-      if _is_multi_column_parser_config(parse_config):
-        # Create internal naming, to prevent possible naming collisions between
-        # tensor_name and column_name.
-        feature_name = _FEATURE_NAME_PREFIX + tensor_name + "_" + column_name
-      else:
-        feature_name = column_name
-      if feature_name in feature_name_to_tensor_name:
-        clashing_tensor_rep = self.TensorRepresentations()[
-            feature_name_to_tensor_name[feature_name]]
-        raise ValueError(f"Unable to create a valid parsing config. Feature "
-                         f"name: {feature_name} is a duplicate of "
-                         f"tensor representation: {clashing_tensor_rep}")
-      feature_name_to_tensor_name[feature_name] = tensor_name
-      features[feature_name] = parse_config
-
-    _validate_tf_example_parser_config(features, self._schema)
-
-    return features, feature_name_to_tensor_name
-
-  def _RenameFeatures(
-      self, feature_dict: Dict[str, Any],
-      feature_name_to_tensor_name: Dict[str, str]) -> Dict[str, Any]:
-    """Renames the feature keys to use the tensor representation keys."""
-    renamed_feature_dict = {}
-    for feature_name, tensor in feature_dict.items():
-      renamed_feature_dict[
-          feature_name_to_tensor_name[feature_name]] = tensor
-
-    return renamed_feature_dict
+    """Base class for TFXIO implementations for record based tf.Examples."""
+
+    def __init__(
+        self,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+        telemetry_descriptors: Optional[List[str]] = None,
+        physical_format: Optional[str] = None,
+        schema_for_decoding: Optional[schema_pb2.Schema] = None,
+    ):
+        # TODO(b/154648951): make telemetry_descriptors and physical_format required
+        # arguments, when TFT's compatibility TFXIO starts setting them.
+        if physical_format is None:
+            physical_format = "unknown"
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            raw_record_column_name=raw_record_column_name,
+            logical_format="tf_example",
+            physical_format=physical_format,
+        )
+        self._schema = schema
+
+        if schema_for_decoding is not None:
+            assert schema is not None
+        self._schema_for_decoding = schema_for_decoding
+
+    def SupportAttachingRawRecords(self) -> bool:
+        return True
+
+    @abc.abstractmethod
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        """Returns a PTransform that produces PCollection[bytes]."""
+
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def ptransform_fn(raw_records_pcoll: beam.pvalue.PCollection):
+            return (
+                raw_records_pcoll
+                | "Batch"
+                >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
+                | "Decode"
+                >> beam.ParDo(
+                    _DecodeBatchExamplesDoFn(
+                        self._GetSchemaForDecoding(), self.raw_record_column_name
+                    )
+                )
+            )
+
+        return beam.ptransform_fn(ptransform_fn)()
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        schema = self._GetSchemaForDecoding()
+        if schema is None:
+            raise ValueError(
+                "TFMD schema not provided. Unable to derive an " "Arrow schema"
+            )
+        return example_coder.ExamplesToRecordBatchDecoder(
+            schema.SerializeToString()
+        ).ArrowSchema()
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
+            self._schema
+        )
+
+    def _GetSchemaForDecoding(self) -> Optional[schema_pb2.Schema]:
+        return (
+            self._schema
+            if self._schema_for_decoding is None
+            else self._schema_for_decoding
+        )
+
+    def _GetTfExampleParserConfig(self) -> Tuple[Dict[str, Any], Dict[str, str]]:
+        """Creates a dict feature spec that can be used in tf.io.parse_example().
+
+        To reduce confusion: 'tensor name' are the keys of TensorRepresentations.
+        'feature name' are the keys to the tf.Example parser config.
+        'column name' are the features in the schema.
+
+        Returns
+        -------
+          Two maps. The first is the parser config that maps from feature
+          name to a tf.io Feature. The second is a mapping from feature names to
+          tensor names.
+
+        Raises
+        ------
+          ValueError: if the tf.Example parser config is invalid.
+        """
+        if self._schema is None:
+            raise ValueError(
+                "Unable to create a parsing config because no schema is provided."
+            )
+
+        column_name_to_type = {f.name: f.type for f in self._schema.feature}
+        features = {}
+        feature_name_to_tensor_name = {}
+        for tensor_name, tensor_rep in self.TensorRepresentations().items():
+            paths = tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
+                tensor_rep
+            )
+            if len(paths) == 1:
+                # The parser config refers to a single tf.Example feature. In this case,
+                # the key to the parser config needs to be the name of the feature.
+                column_name = paths[0].initial_step()
+                value_type = column_name_to_type[column_name]
+            else:
+                # The parser config needs to refer to multiple tf.Example features. In
+                # this case the key to the parser config does not matter. We preserve
+                # the tensor representation key.
+                column_name = tensor_name
+                value_type = column_name_to_type[
+                    tensor_representation_util.GetSourceValueColumnFromTensorRepresentation(
+                        tensor_rep
+                    ).initial_step()
+                ]
+            parse_config = tensor_representation_util.CreateTfExampleParserConfig(
+                tensor_rep, value_type
+            )
+
+            if _is_multi_column_parser_config(parse_config):
+                # Create internal naming, to prevent possible naming collisions between
+                # tensor_name and column_name.
+                feature_name = _FEATURE_NAME_PREFIX + tensor_name + "_" + column_name
+            else:
+                feature_name = column_name
+            if feature_name in feature_name_to_tensor_name:
+                clashing_tensor_rep = self.TensorRepresentations()[
+                    feature_name_to_tensor_name[feature_name]
+                ]
+                raise ValueError(
+                    f"Unable to create a valid parsing config. Feature "
+                    f"name: {feature_name} is a duplicate of "
+                    f"tensor representation: {clashing_tensor_rep}"
+                )
+            feature_name_to_tensor_name[feature_name] = tensor_name
+            features[feature_name] = parse_config
+
+        _validate_tf_example_parser_config(features, self._schema)
+
+        return features, feature_name_to_tensor_name
+
+    def _RenameFeatures(
+        self, feature_dict: Dict[str, Any], feature_name_to_tensor_name: Dict[str, str]
+    ) -> Dict[str, Any]:
+        """Renames the feature keys to use the tensor representation keys."""
+        renamed_feature_dict = {}
+        for feature_name, tensor in feature_dict.items():
+            renamed_feature_dict[feature_name_to_tensor_name[feature_name]] = tensor
+
+        return renamed_feature_dict
 
 
 class TFExampleBeamRecord(_TFExampleRecordBase):
-  """TFXIO implementation for serialized tf.Examples in pcoll[bytes].
-
-  This is a special TFXIO that does not actually do I/O -- it relies on the
-  caller to prepare a PCollection of bytes (serialized tf.Examples).
-  """
-
-  def __init__(self,
-               physical_format: str,
-               telemetry_descriptors: Optional[List[str]] = None,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[str] = None):
-    """Initializer.
-
-    Args:
-      physical_format: The physical format that describes where the input
-        pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
-        "tfrecord".
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      schema: A TFMD Schema describing the dataset.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
+    """TFXIO implementation for serialized tf.Examples in pcoll[bytes].
+
+    This is a special TFXIO that does not actually do I/O -- it relies on the
+    caller to prepare a PCollection of bytes (serialized tf.Examples).
     """
-    super().__init__(
-        schema=schema, raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format=physical_format)
 
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return (beam.ptransform_fn(lambda x: x)()
+    def __init__(
+        self,
+        physical_format: str,
+        telemetry_descriptors: Optional[List[str]] = None,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          physical_format: The physical format that describes where the input
+            pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
+            "tfrecord".
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          schema: A TFMD Schema describing the dataset.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+        """
+        super().__init__(
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format=physical_format,
+        )
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return (
+            beam.ptransform_fn(lambda x: x)()
             .with_input_types(bytes)
-            .with_output_types(bytes))
-
-  def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
-    # Note: We only copy projected features into the new schema because the
-    # coder, and ArrowSchema() only care about Schema.feature. If they start
-    # depending on other Schema fields then those fields must also be projected.
-    projected_schema = (
-        tensor_representation_util.ProjectTensorRepresentationsInSchema(
-            self._schema, tensor_names))
-    return TFExampleBeamRecord(self._physical_format,
-                               self.telemetry_descriptors, projected_schema,
-                               self.raw_record_column_name)
-
-  def TensorFlowDataset(self,
-                        options: dataset_options.TensorFlowDatasetOptions):
-    raise NotImplementedError(
-        "TFExampleBeamRecord is unable to provide a TensorFlowDataset "
-        "because it does not do I/O")
+            .with_output_types(bytes)
+        )
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        # Note: We only copy projected features into the new schema because the
+        # coder, and ArrowSchema() only care about Schema.feature. If they start
+        # depending on other Schema fields then those fields must also be projected.
+        projected_schema = (
+            tensor_representation_util.ProjectTensorRepresentationsInSchema(
+                self._schema, tensor_names
+            )
+        )
+        return TFExampleBeamRecord(
+            self._physical_format,
+            self.telemetry_descriptors,
+            projected_schema,
+            self.raw_record_column_name,
+        )
+
+    def TensorFlowDataset(self, options: dataset_options.TensorFlowDatasetOptions):
+        raise NotImplementedError(
+            "TFExampleBeamRecord is unable to provide a TensorFlowDataset "
+            "because it does not do I/O"
+        )
 
 
 class TFExampleRecord(_TFExampleRecordBase):
-  """TFXIO implementation for tf.Example on TFRecord."""
-
-  def __init__(self,
-               file_pattern: Union[List[str], str],
-               validate: bool = True,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[str] = None,
-               telemetry_descriptors: Optional[List[str]] = None):
-    """Initializes a TFExampleRecord TFXIO.
-
-    Args:
-      file_pattern: A file glob pattern to read TFRecords from.
-      validate: Not used. do not set. (not used since post 0.22.1).
-      schema: A TFMD Schema describing the dataset.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-    """
-    super().__init__(
-        schema=schema, raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format="tfrecords_gzip")
-    del validate
-    if not isinstance(file_pattern, list):
-      file_pattern = [file_pattern]
-    assert file_pattern, "Must provide at least one file pattern."
-    self._file_pattern = file_pattern
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return record_based_tfxio.ReadTfRecord(self._file_pattern)
-
-  def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
-    # Note: We only copy projected features into the new schema because the
-    # coder, and ArrowSchema() only care about Schema.feature. If they start
-    # depending on other Schema fields then those fields must also be projected.
-    projected_schema = (
-        tensor_representation_util.ProjectTensorRepresentationsInSchema(
-            self._schema, tensor_names))
-    return TFExampleRecord(
-        file_pattern=self._file_pattern,
-        schema=projected_schema,
-        raw_record_column_name=self.raw_record_column_name,
-        telemetry_descriptors=self.telemetry_descriptors)
-
-  def RecordBatches(
-      self, options: dataset_options.RecordBatchesOptions
-  ) -> Iterator[pa.RecordBatch]:
-    dataset = dataset_util.make_tf_record_dataset(
-        self._file_pattern, options.batch_size, options.drop_final_batch,
-        options.num_epochs, options.shuffle, options.shuffle_buffer_size,
-        options.shuffle_seed)
-
-    decoder = example_coder.ExamplesToRecordBatchDecoder(
-        self._schema.SerializeToString() if self._schema is not None else None
-    )
-    for examples in dataset.as_numpy_iterator():
-      decoded = decoder.DecodeBatch(examples)
-      if self._raw_record_column_name is None:
-        yield decoded
-      else:
-        yield record_based_tfxio.AppendRawRecordColumn(
-            decoded, self._raw_record_column_name, examples.tolist())
-
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    """Creates a TFRecordDataset that yields Tensors.
-
-    The serialized tf.Examples are parsed by `tf.io.parse_example` to create
-    Tensors.
-
-    See base class (tfxio.TFXIO) for more details.
-
-    Args:
-      options: an options object for the tf.data.Dataset. See
-        `dataset_options.TensorFlowDatasetOptions` for more details.
-
-    Returns:
-      A dataset of `dict` elements, (or a tuple of `dict` elements and label).
-      Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
-      `RaggedTensor` objects.
-
-    Raises:
-      ValueError: if there is something wrong with the tensor_representation.
-    """
-    (tf_example_parser_config,
-     feature_name_to_tensor_name) = self._GetTfExampleParserConfig()
-
-    file_pattern = tf.convert_to_tensor(self._file_pattern)
-    dataset = dataset_util.make_tf_record_dataset(
-        file_pattern,
-        batch_size=options.batch_size,
-        num_epochs=options.num_epochs,
-        shuffle=options.shuffle,
-        shuffle_buffer_size=options.shuffle_buffer_size,
-        shuffle_seed=options.shuffle_seed,
-        reader_num_threads=options.reader_num_threads,
-        drop_final_batch=options.drop_final_batch)
-
-    # Parse `Example` tensors to a dictionary of `Feature` tensors.
-    dataset = dataset.apply(
-        tf.data.experimental.parse_example_dataset(tf_example_parser_config))
-
-    dataset = dataset.map(
-        lambda x: self._RenameFeatures(x, feature_name_to_tensor_name))
-
-    label_key = options.label_key
-    if label_key is not None:
-      dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
-
-    return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
+    """TFXIO implementation for tf.Example on TFRecord."""
+
+    def __init__(
+        self,
+        file_pattern: Union[List[str], str],
+        validate: bool = True,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+        telemetry_descriptors: Optional[List[str]] = None,
+    ):
+        """Initializes a TFExampleRecord TFXIO.
+
+        Args:
+        ----
+          file_pattern: A file glob pattern to read TFRecords from.
+          validate: Not used. do not set. (not used since post 0.22.1).
+          schema: A TFMD Schema describing the dataset.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+        """
+        super().__init__(
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format="tfrecords_gzip",
+        )
+        del validate
+        if not isinstance(file_pattern, list):
+            file_pattern = [file_pattern]
+        assert file_pattern, "Must provide at least one file pattern."
+        self._file_pattern = file_pattern
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return record_based_tfxio.ReadTfRecord(self._file_pattern)
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        # Note: We only copy projected features into the new schema because the
+        # coder, and ArrowSchema() only care about Schema.feature. If they start
+        # depending on other Schema fields then those fields must also be projected.
+        projected_schema = (
+            tensor_representation_util.ProjectTensorRepresentationsInSchema(
+                self._schema, tensor_names
+            )
+        )
+        return TFExampleRecord(
+            file_pattern=self._file_pattern,
+            schema=projected_schema,
+            raw_record_column_name=self.raw_record_column_name,
+            telemetry_descriptors=self.telemetry_descriptors,
+        )
+
+    def RecordBatches(
+        self, options: dataset_options.RecordBatchesOptions
+    ) -> Iterator[pa.RecordBatch]:
+        dataset = dataset_util.make_tf_record_dataset(
+            self._file_pattern,
+            options.batch_size,
+            options.drop_final_batch,
+            options.num_epochs,
+            options.shuffle,
+            options.shuffle_buffer_size,
+            options.shuffle_seed,
+        )
+
+        decoder = example_coder.ExamplesToRecordBatchDecoder(
+            self._schema.SerializeToString() if self._schema is not None else None
+        )
+        for examples in dataset.as_numpy_iterator():
+            decoded = decoder.DecodeBatch(examples)
+            if self._raw_record_column_name is None:
+                yield decoded
+            else:
+                yield record_based_tfxio.AppendRawRecordColumn(
+                    decoded, self._raw_record_column_name, examples.tolist()
+                )
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        """Creates a TFRecordDataset that yields Tensors.
+
+        The serialized tf.Examples are parsed by `tf.io.parse_example` to create
+        Tensors.
+
+        See base class (tfxio.TFXIO) for more details.
+
+        Args:
+        ----
+          options: an options object for the tf.data.Dataset. See
+            `dataset_options.TensorFlowDatasetOptions` for more details.
+
+        Returns:
+        -------
+          A dataset of `dict` elements, (or a tuple of `dict` elements and label).
+          Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
+          `RaggedTensor` objects.
+
+        Raises:
+        ------
+          ValueError: if there is something wrong with the tensor_representation.
+        """
+        (tf_example_parser_config, feature_name_to_tensor_name) = (
+            self._GetTfExampleParserConfig()
+        )
+
+        file_pattern = tf.convert_to_tensor(self._file_pattern)
+        dataset = dataset_util.make_tf_record_dataset(
+            file_pattern,
+            batch_size=options.batch_size,
+            num_epochs=options.num_epochs,
+            shuffle=options.shuffle,
+            shuffle_buffer_size=options.shuffle_buffer_size,
+            shuffle_seed=options.shuffle_seed,
+            reader_num_threads=options.reader_num_threads,
+            drop_final_batch=options.drop_final_batch,
+        )
+
+        # Parse `Example` tensors to a dictionary of `Feature` tensors.
+        dataset = dataset.apply(
+            tf.data.experimental.parse_example_dataset(tf_example_parser_config)
+        )
+
+        dataset = dataset.map(
+            lambda x: self._RenameFeatures(x, feature_name_to_tensor_name)
+        )
+
+        label_key = options.label_key
+        if label_key is not None:
+            dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
+
+        return dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
 
 
 @beam.typehints.with_input_types(List[bytes])
 @beam.typehints.with_output_types(pa.RecordBatch)
 class _DecodeBatchExamplesDoFn(beam.DoFn):
-  """Batches serialized protos bytes and decode them into an Arrow table."""
-
-  def __init__(self, schema: Optional[schema_pb2.Schema],
-               raw_record_column_name: Optional[str]):
-    """Initializer."""
-    self._serialized_schema = None
-    if schema is not None:
-      # Serialize to avoid storing TFMD protos. See b/167128119 for the reason.
-      self._serialized_schema = schema.SerializeToString()
-    self._raw_record_column_name = raw_record_column_name
-    self._decoder = None
-
-  def setup(self):
-    if self._serialized_schema:
-      self._decoder = example_coder.ExamplesToRecordBatchDecoder(
-          self._serialized_schema)
-    else:
-      self._decoder = example_coder.ExamplesToRecordBatchDecoder()
-
-  def process(self, examples: List[bytes]):
-    if not self._decoder:
-      raise ValueError("Decoder uninitialized. Run setup() first.")
-    decoded = self._decoder.DecodeBatch(examples)
-    if self._raw_record_column_name is None:
-      yield decoded
-    else:
-      yield record_based_tfxio.AppendRawRecordColumn(
-          decoded, self._raw_record_column_name, examples)
-
-
-def _validate_tf_example_parser_config(config: Dict[str, Any],
-                                       schema: schema_pb2.Schema) -> None:
-  """Validate a tf_example_parse_config by tracing parse_example."""
-
-  # TODO(b/173738031): We would have used a tf.io.validate_parsing_config() if
-  # it existed.
-  @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
-  def parse(i):
-    tf.io.parse_example(i, config)
-
-  try:
-    # This forces a tracing of `parse`, and raises if the parsing config
-    # is not valid.
-    parse.get_concrete_function()
-
-  except (ValueError, TypeError) as err:
-    raise ValueError("Unable to create a valid parsing config from the "
-                     "provided schema's tensor representation: {}. Due to the "
-                     "following error: {}".format(schema, err))
+    """Batches serialized protos bytes and decode them into an Arrow table."""
+
+    def __init__(
+        self, schema: Optional[schema_pb2.Schema], raw_record_column_name: Optional[str]
+    ):
+        """Initializer."""
+        self._serialized_schema = None
+        if schema is not None:
+            # Serialize to avoid storing TFMD protos. See b/167128119 for the reason.
+            self._serialized_schema = schema.SerializeToString()
+        self._raw_record_column_name = raw_record_column_name
+        self._decoder = None
+
+    def setup(self):
+        if self._serialized_schema:
+            self._decoder = example_coder.ExamplesToRecordBatchDecoder(
+                self._serialized_schema
+            )
+        else:
+            self._decoder = example_coder.ExamplesToRecordBatchDecoder()
+
+    def process(self, examples: List[bytes]):
+        if not self._decoder:
+            raise ValueError("Decoder uninitialized. Run setup() first.")
+        decoded = self._decoder.DecodeBatch(examples)
+        if self._raw_record_column_name is None:
+            yield decoded
+        else:
+            yield record_based_tfxio.AppendRawRecordColumn(
+                decoded, self._raw_record_column_name, examples
+            )
+
+
+def _validate_tf_example_parser_config(
+    config: Dict[str, Any], schema: schema_pb2.Schema
+) -> None:
+    """Validate a tf_example_parse_config by tracing parse_example."""
+
+    # TODO(b/173738031): We would have used a tf.io.validate_parsing_config() if
+    # it existed.
+    @tf.function(input_signature=[tf.TensorSpec(shape=[None], dtype=tf.string)])
+    def parse(i):
+        tf.io.parse_example(i, config)
+
+    try:
+        # This forces a tracing of `parse`, and raises if the parsing config
+        # is not valid.
+        parse.get_concrete_function()
+
+    except (ValueError, TypeError) as err:
+        raise ValueError(
+            "Unable to create a valid parsing config from the "
+            f"provided schema's tensor representation: {schema}. Due to the "
+            f"following error: {err}"
+        )
 
 
 def _is_multi_column_parser_config(parser_config):
-  return isinstance(parser_config, (tf.io.SparseFeature, tf.io.RaggedFeature))
+    return isinstance(parser_config, (tf.io.SparseFeature, tf.io.RaggedFeature))
diff --git a/tfx_bsl/tfxio/tf_example_record_test.py b/tfx_bsl/tfxio/tf_example_record_test.py
index 1e4542dc..c2733053 100644
--- a/tfx_bsl/tfxio/tf_example_record_test.py
+++ b/tfx_bsl/tfxio/tf_example_record_test.py
@@ -15,27 +15,24 @@
 
 import os
 import unittest
-import pytest
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import numpy as np
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import telemetry_test_util
-from tfx_bsl.tfxio import tf_example_record
-
+from absl import flags
+from absl.testing import absltest, parameterized
+from apache_beam.testing import util as beam_testing_util
 from google.protobuf import text_format
-from absl.testing import absltest
-from absl.testing import parameterized
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import dataset_options, telemetry_test_util, tf_example_record
 
 FLAGS = flags.FLAGS
 
-_SCHEMA = text_format.Parse("""
+_SCHEMA = text_format.Parse(
+    """
   feature {
     name: "int_feature"
     type: INT
@@ -60,18 +57,20 @@
       max: 2
     }
   }
-""", schema_pb2.Schema())
+""",
+    schema_pb2.Schema(),
+)
 
 _TELEMETRY_DESCRIPTORS = ["Some", "Component"]
 
 _IS_LEGACY_SCHEMA = (
-    "generate_legacy_feature_spec" in
-    schema_pb2.Schema.DESCRIPTOR.fields_by_name)
+    "generate_legacy_feature_spec" in schema_pb2.Schema.DESCRIPTOR.fields_by_name
+)
 
 # Enforce a consistent behavior in inferring TensorRepresentations from the
 # schema.
 if _IS_LEGACY_SCHEMA:
-  _SCHEMA.generate_legacy_feature_spec = False
+    _SCHEMA.generate_legacy_feature_spec = False
 
 _EXAMPLES = [
     """
@@ -139,319 +138,349 @@
 
 
 def CreateExamplesAsTensors():
-  if tf.executing_eagerly():
-    sparse_tensor_factory = tf.SparseTensor
-  else:
-    sparse_tensor_factory = tf.compat.v1.SparseTensorValue
-
-  return [{
-      "int_feature":
-          sparse_tensor_factory(
-              values=[1], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[1.0, 2.0, 3.0, 4.0],
-              indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
-              dense_shape=[1, 4]),
-      "string_feature":
-          sparse_tensor_factory(
-              values=[], indices=np.empty((0, 2)), dense_shape=[1, 0])
-  }, {
-      "int_feature":
-          sparse_tensor_factory(
-              values=[2], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[2.0, 3.0, 4.0, 5.0],
-              indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
-              dense_shape=[1, 4]),
-      "string_feature":
-          sparse_tensor_factory(
-              values=[b"foo", b"bar"],
-              indices=[[0, 0], [0, 1]],
-              dense_shape=[1, 2])
-  }, {
-      "int_feature":
-          sparse_tensor_factory(
-              values=[3], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[4.0, 5.0, 6.0, 7.0],
-              indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
-              dense_shape=[1, 4]),
-      "string_feature":
-          sparse_tensor_factory(
-              values=[], indices=np.empty((0, 2)), dense_shape=[1, 0])
-  }]
+    if tf.executing_eagerly():
+        sparse_tensor_factory = tf.SparseTensor
+    else:
+        sparse_tensor_factory = tf.compat.v1.SparseTensorValue
+
+    return [
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[1], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[1.0, 2.0, 3.0, 4.0],
+                indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
+                dense_shape=[1, 4],
+            ),
+            "string_feature": sparse_tensor_factory(
+                values=[], indices=np.empty((0, 2)), dense_shape=[1, 0]
+            ),
+        },
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[2], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[2.0, 3.0, 4.0, 5.0],
+                indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
+                dense_shape=[1, 4],
+            ),
+            "string_feature": sparse_tensor_factory(
+                values=[b"foo", b"bar"], indices=[[0, 0], [0, 1]], dense_shape=[1, 2]
+            ),
+        },
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[3], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[4.0, 5.0, 6.0, 7.0],
+                indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
+                dense_shape=[1, 4],
+            ),
+            "string_feature": sparse_tensor_factory(
+                values=[], indices=np.empty((0, 2)), dense_shape=[1, 0]
+            ),
+        },
+    ]
 
 
 _EXAMPLES_AS_TENSORS = CreateExamplesAsTensors()
 
 
 _EXPECTED_COLUMN_VALUES = {
-    "int_feature":
-        pa.array([[1], [2], [3]], type=pa.large_list(pa.int64())),
-    "float_feature":
-        pa.array([[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7]],
-                 type=pa.large_list(pa.float32())),
-    "string_feature":
-        pa.array([None, ["foo", "bar"], None],
-                 type=pa.large_list(pa.large_binary())),
+    "int_feature": pa.array([[1], [2], [3]], type=pa.large_list(pa.int64())),
+    "float_feature": pa.array(
+        [[1, 2, 3, 4], [2, 3, 4, 5], [4, 5, 6, 7]], type=pa.large_list(pa.float32())
+    ),
+    "string_feature": pa.array(
+        [None, ["foo", "bar"], None], type=pa.large_list(pa.large_binary())
+    ),
 }
 
 
 def _WriteInputs(filename):
-  with tf.io.TFRecordWriter(filename, "GZIP") as w:
-    for s in _SERIALIZED_EXAMPLES:
-      w.write(s)
+    with tf.io.TFRecordWriter(filename, "GZIP") as w:
+        for s in _SERIALIZED_EXAMPLES:
+            w.write(s)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class TfExampleRecordTest(tf.test.TestCase, parameterized.TestCase):
-
-  @classmethod
-  def setUpClass(cls):
-    super().setUpClass()
-    cls._example_file = os.path.join(
-        FLAGS.test_tmpdir, "tfexamplerecordtest", "input.recordio.gz")
-    tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
-    _WriteInputs(cls._example_file)
-
-  def _MakeTFXIO(self, schema, raw_record_column_name=None):
-    return tf_example_record.TFExampleRecord(
-        self._example_file, schema=schema,
-        raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-  def _ValidateRecordBatch(
-      self, tfxio, record_batch, raw_record_column_name=None):
-    self.assertIsInstance(record_batch, pa.RecordBatch)
-    self.assertEqual(record_batch.num_rows, 3)
-    for i, field in enumerate(record_batch.schema):
-      if field.name == raw_record_column_name:
-        continue
-      self.assertTrue(record_batch.column(i).equals(
-          _EXPECTED_COLUMN_VALUES[field.name]),
-                      "Column {} did not match ({} vs {})."
-                      .format(field.name, record_batch.column(i),
-                              _EXPECTED_COLUMN_VALUES[field.name]))
-
-    if raw_record_column_name is not None:
-      self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
-      self.assertTrue(record_batch.columns[-1].type.equals(
-          pa.large_list(pa.large_binary())))
-      self.assertEqual(record_batch.columns[-1].flatten().to_pylist(),
-                       _SERIALIZED_EXAMPLES)
-
-  def _AssertSparseTensorEqual(self, lhs, rhs):
-    self.assertAllEqual(lhs.values, rhs.values)
-    self.assertAllEqual(lhs.indices, rhs.indices)
-    self.assertAllEqual(lhs.dense_shape, rhs.dense_shape)
-
-  def testImplicitTensorRepresentations(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    self.assertEqual(
-        {
-            "int_feature": text_format.Parse(
-                """varlen_sparse_tensor { column_name: "int_feature" }""",
-                schema_pb2.TensorRepresentation()),
-            "float_feature": text_format.Parse(
-                """varlen_sparse_tensor { column_name: "float_feature" }""",
-                schema_pb2.TensorRepresentation()),
-            "string_feature": text_format.Parse(
-                """varlen_sparse_tensor { column_name: "string_feature" }""",
-                schema_pb2.TensorRepresentation()),
-        }, tfxio.TensorRepresentations())
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(tfxio, record_batch)
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 3)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("string_feature", dict_of_tensors)
-
-    p = beam.Pipeline()
-    record_batch_pcoll = p | tfxio.BeamSource(batch_size=1000)
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(
-        self, pipeline_result, _TELEMETRY_DESCRIPTORS,
-        "tf_example", "tfrecords_gzip")
-
-  def testExplicitTensorRepresentations(self):
-    schema = schema_pb2.Schema()
-    schema.CopyFrom(_SCHEMA)
-    tensor_representations = {
-        "my_feature":
-            text_format.Parse("""
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._example_file = os.path.join(
+            FLAGS.test_tmpdir, "tfexamplerecordtest", "input.recordio.gz"
+        )
+        tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
+        _WriteInputs(cls._example_file)
+
+    def _MakeTFXIO(self, schema, raw_record_column_name=None):
+        return tf_example_record.TFExampleRecord(
+            self._example_file,
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+
+    def _ValidateRecordBatch(self, tfxio, record_batch, raw_record_column_name=None):
+        self.assertIsInstance(record_batch, pa.RecordBatch)
+        self.assertEqual(record_batch.num_rows, 3)
+        for i, field in enumerate(record_batch.schema):
+            if field.name == raw_record_column_name:
+                continue
+            self.assertTrue(
+                record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[field.name]),
+                f"Column {field.name} did not match ({record_batch.column(i)} vs {_EXPECTED_COLUMN_VALUES[field.name]}).",
+            )
+
+        if raw_record_column_name is not None:
+            self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
+            self.assertTrue(
+                record_batch.columns[-1].type.equals(pa.large_list(pa.large_binary()))
+            )
+            self.assertEqual(
+                record_batch.columns[-1].flatten().to_pylist(), _SERIALIZED_EXAMPLES
+            )
+
+    def _AssertSparseTensorEqual(self, lhs, rhs):
+        self.assertAllEqual(lhs.values, rhs.values)
+        self.assertAllEqual(lhs.indices, rhs.indices)
+        self.assertAllEqual(lhs.dense_shape, rhs.dense_shape)
+
+    def testImplicitTensorRepresentations(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        self.assertEqual(
+            {
+                "int_feature": text_format.Parse(
+                    """varlen_sparse_tensor { column_name: "int_feature" }""",
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "float_feature": text_format.Parse(
+                    """varlen_sparse_tensor { column_name: "float_feature" }""",
+                    schema_pb2.TensorRepresentation(),
+                ),
+                "string_feature": text_format.Parse(
+                    """varlen_sparse_tensor { column_name: "string_feature" }""",
+                    schema_pb2.TensorRepresentation(),
+                ),
+            },
+            tfxio.TensorRepresentations(),
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(tfxio, record_batch)
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 3)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("string_feature", dict_of_tensors)
+
+        p = beam.Pipeline()
+        record_batch_pcoll = p | tfxio.BeamSource(batch_size=1000)
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self,
+            pipeline_result,
+            _TELEMETRY_DESCRIPTORS,
+            "tf_example",
+            "tfrecords_gzip",
+        )
+
+    def testExplicitTensorRepresentations(self):
+        schema = schema_pb2.Schema()
+        schema.CopyFrom(_SCHEMA)
+        tensor_representations = {
+            "my_feature": text_format.Parse(
+                """
             dense_tensor {
              column_name: "string_feature"
              shape { dim { size: 2 } }
              default_value { bytes_value: "zzz" }
-           }""", schema_pb2.TensorRepresentation())
-    }
-    schema.tensor_representation_group[""].CopyFrom(
-        schema_pb2.TensorRepresentationGroup(
-            tensor_representation=tensor_representations))
+           }""",
+                schema_pb2.TensorRepresentation(),
+            )
+        }
+        schema.tensor_representation_group[""].CopyFrom(
+            schema_pb2.TensorRepresentationGroup(
+                tensor_representation=tensor_representations
+            )
+        )
 
-    tfxio = self._MakeTFXIO(schema)
+        tfxio = self._MakeTFXIO(schema)
 
-    expected_tensor_representations = {
-        "int_feature":
-            text_format.Parse(
+        expected_tensor_representations = {
+            "int_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "int_feature"}""",
-                schema_pb2.TensorRepresentation()),
-        "float_feature":
-            text_format.Parse(
+                schema_pb2.TensorRepresentation(),
+            ),
+            "float_feature": text_format.Parse(
                 """varlen_sparse_tensor { column_name: "float_feature"}""",
-                schema_pb2.TensorRepresentation()),
-    }
-    expected_tensor_representations.update(tensor_representations)
-    self.assertEqual(expected_tensor_representations,
-                     tfxio.TensorRepresentations())
-
-  def testProjection(self):
-    schema = schema_pb2.Schema()
-    schema.CopyFrom(_SCHEMA)
-    tensor_representations = {
-        "dense_string":
-            text_format.Parse(
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
+        expected_tensor_representations.update(tensor_representations)
+        self.assertEqual(expected_tensor_representations, tfxio.TensorRepresentations())
+
+    def testProjection(self):
+        schema = schema_pb2.Schema()
+        schema.CopyFrom(_SCHEMA)
+        tensor_representations = {
+            "dense_string": text_format.Parse(
                 """dense_tensor {
              column_name: "string_feature"
              shape { dim { size: 2 } }
              default_value { bytes_value: "zzz" }
-           }""", schema_pb2.TensorRepresentation()),
-        "varlen_int":
-            text_format.Parse(
+           }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+            "varlen_int": text_format.Parse(
                 """varlen_sparse_tensor {
              column_name: "int_feature"
-           }""", schema_pb2.TensorRepresentation()),
-        "varlen_float":
-            text_format.Parse(
+           }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+            "varlen_float": text_format.Parse(
                 """varlen_sparse_tensor {
              column_name: "float_feature"
-           }""", schema_pb2.TensorRepresentation()),
-    }
-    schema.tensor_representation_group[""].CopyFrom(
-        schema_pb2.TensorRepresentationGroup(
-            tensor_representation=tensor_representations))
-
-    tfxio = self._MakeTFXIO(schema)
-    self.assertEqual(tensor_representations, tfxio.TensorRepresentations())
-
-    projected_tfxio = tfxio.Project(
-        ["dense_string", "varlen_int", "varlen_float"])
-    self.assertEqual(tensor_representations,
-                     projected_tfxio.TensorRepresentations())
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(tfxio, record_batch)
-      expected_schema = projected_tfxio.ArrowSchema()
-      self.assertTrue(
-          record_batch.schema.equals(expected_schema),
-          "actual: {}; expected: {}".format(
-              record_batch.schema, expected_schema))
-      tensor_adapter = projected_tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 3)
-      self.assertIn("dense_string", dict_of_tensors)
-      self.assertIn("varlen_int", dict_of_tensors)
-      self.assertIn("varlen_float", dict_of_tensors)
-
-    with beam.Pipeline() as p:
-      # Setting the betch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | projected_tfxio.BeamSource(
-          batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testAttachRawRecordColumn(self):
-    raw_example_column_name = "raw_records"
-    tfxio = self._MakeTFXIO(_SCHEMA, raw_example_column_name)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      self._ValidateRecordBatch(tfxio, record_batch, raw_example_column_name)
-
-    with beam.Pipeline() as p:
-      # Setting the batch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testRecordBatches(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    options = dataset_options.RecordBatchesOptions(
-        batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
-    for record_batch in tfxio.RecordBatches(options):
-      self._ValidateRecordBatch(tfxio, record_batch)
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testRecordBatchesWithRawRecords(self):
-    raw_example_column_name = "raw_records"
-    tfxio = self._MakeTFXIO(_SCHEMA, raw_example_column_name)
-    options = dataset_options.RecordBatchesOptions(
-        batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
-    for record_batch in tfxio.RecordBatches(options):
-      self._ValidateRecordBatch(tfxio, record_batch, raw_example_column_name)
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testRecordBatchesWithProject(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    feature_name = "string_feature"
-    projected_tfxio = tfxio.Project([feature_name])
-    options = dataset_options.RecordBatchesOptions(
-        batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1)
-    for record_batch in projected_tfxio.RecordBatches(options):
-      self._ValidateRecordBatch(projected_tfxio, record_batch)
-      self.assertIn(feature_name, record_batch.schema.names)
-      self.assertLen(record_batch.schema.names, 1)
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testTensorFlowDataset(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, parsed_examples_dict in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      self.assertLen(parsed_examples_dict, 3)
-      for tensor_name, tensor in parsed_examples_dict.items():
-        self._AssertSparseTensorEqual(
-            tensor, _EXAMPLES_AS_TENSORS[i][tensor_name])
-
-  def testTensorFlowDatasetGraphMode(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    with tf.compat.v1.Graph().as_default():
-      ds = tfxio.TensorFlowDataset(options=options)
-      iterator = tf.compat.v1.data.make_one_shot_iterator(ds)
-      next_elem = iterator.get_next()
-      records = []
-      with tf.compat.v1.Session() as sess:
-        while True:
-          try:
-            records.append(sess.run(next_elem))
-          except tf.errors.OutOfRangeError:
-            break
-    for i, parsed_examples_dict in enumerate(records):
-      self.assertLen(parsed_examples_dict, 3)
-      for tensor_name, tensor in parsed_examples_dict.items():
-        self._AssertSparseTensorEqual(
-            tensor, _EXAMPLES_AS_TENSORS[i][tensor_name])
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testTensorFlowDatasetWithTensorRepresentation(self):
-    schema = text_format.Parse("""
+           }""",
+                schema_pb2.TensorRepresentation(),
+            ),
+        }
+        schema.tensor_representation_group[""].CopyFrom(
+            schema_pb2.TensorRepresentationGroup(
+                tensor_representation=tensor_representations
+            )
+        )
+
+        tfxio = self._MakeTFXIO(schema)
+        self.assertEqual(tensor_representations, tfxio.TensorRepresentations())
+
+        projected_tfxio = tfxio.Project(["dense_string", "varlen_int", "varlen_float"])
+        self.assertEqual(
+            tensor_representations, projected_tfxio.TensorRepresentations()
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(tfxio, record_batch)
+            expected_schema = projected_tfxio.ArrowSchema()
+            self.assertTrue(
+                record_batch.schema.equals(expected_schema),
+                f"actual: {record_batch.schema}; expected: {expected_schema}",
+            )
+            tensor_adapter = projected_tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 3)
+            self.assertIn("dense_string", dict_of_tensors)
+            self.assertIn("varlen_int", dict_of_tensors)
+            self.assertIn("varlen_float", dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            # Setting the betch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | projected_tfxio.BeamSource(
+                batch_size=len(_EXAMPLES)
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testAttachRawRecordColumn(self):
+        raw_example_column_name = "raw_records"
+        tfxio = self._MakeTFXIO(_SCHEMA, raw_example_column_name)
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            self._ValidateRecordBatch(tfxio, record_batch, raw_example_column_name)
+
+        with beam.Pipeline() as p:
+            # Setting the batch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_EXAMPLES))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testRecordBatches(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        options = dataset_options.RecordBatchesOptions(
+            batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1
+        )
+        for record_batch in tfxio.RecordBatches(options):
+            self._ValidateRecordBatch(tfxio, record_batch)
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testRecordBatchesWithRawRecords(self):
+        raw_example_column_name = "raw_records"
+        tfxio = self._MakeTFXIO(_SCHEMA, raw_example_column_name)
+        options = dataset_options.RecordBatchesOptions(
+            batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1
+        )
+        for record_batch in tfxio.RecordBatches(options):
+            self._ValidateRecordBatch(tfxio, record_batch, raw_example_column_name)
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testRecordBatchesWithProject(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        feature_name = "string_feature"
+        projected_tfxio = tfxio.Project([feature_name])
+        options = dataset_options.RecordBatchesOptions(
+            batch_size=len(_EXAMPLES), shuffle=False, num_epochs=1
+        )
+        for record_batch in projected_tfxio.RecordBatches(options):
+            self._ValidateRecordBatch(projected_tfxio, record_batch)
+            self.assertIn(feature_name, record_batch.schema.names)
+            self.assertLen(record_batch.schema.names, 1)
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testTensorFlowDataset(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, parsed_examples_dict in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            self.assertLen(parsed_examples_dict, 3)
+            for tensor_name, tensor in parsed_examples_dict.items():
+                self._AssertSparseTensorEqual(
+                    tensor, _EXAMPLES_AS_TENSORS[i][tensor_name]
+                )
+
+    def testTensorFlowDatasetGraphMode(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        with tf.compat.v1.Graph().as_default():
+            ds = tfxio.TensorFlowDataset(options=options)
+            iterator = tf.compat.v1.data.make_one_shot_iterator(ds)
+            next_elem = iterator.get_next()
+            records = []
+            with tf.compat.v1.Session() as sess:
+                while True:
+                    try:
+                        records.append(sess.run(next_elem))
+                    except tf.errors.OutOfRangeError:
+                        break
+        for i, parsed_examples_dict in enumerate(records):
+            self.assertLen(parsed_examples_dict, 3)
+            for tensor_name, tensor in parsed_examples_dict.items():
+                self._AssertSparseTensorEqual(
+                    tensor, _EXAMPLES_AS_TENSORS[i][tensor_name]
+                )
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testTensorFlowDatasetWithTensorRepresentation(self):
+        schema = text_format.Parse(
+            """
       feature {
         name: "int_feature"
         type: INT
@@ -489,21 +518,27 @@ def testTensorFlowDatasetWithTensorRepresentation(self):
       }
     }
   }
-    """, schema_pb2.Schema())
-    tfxio = self._MakeTFXIO(schema)
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, parsed_examples_dict in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      self.assertLen(parsed_examples_dict, 3)
-      for name in ["var_len_feature", "int_feature", "float_feature"]:
-        self.assertIn(name, parsed_examples_dict)
-      self._AssertSparseTensorEqual(parsed_examples_dict["var_len_feature"],
-                                    _EXAMPLES_AS_TENSORS[i]["string_feature"])
-
-  def testTensorFlowDatasetWithRaggedTensorRepresentation(self):
-    schema = text_format.Parse(
-        """
+    """,
+            schema_pb2.Schema(),
+        )
+        tfxio = self._MakeTFXIO(schema)
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, parsed_examples_dict in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            self.assertLen(parsed_examples_dict, 3)
+            for name in ["var_len_feature", "int_feature", "float_feature"]:
+                self.assertIn(name, parsed_examples_dict)
+            self._AssertSparseTensorEqual(
+                parsed_examples_dict["var_len_feature"],
+                _EXAMPLES_AS_TENSORS[i]["string_feature"],
+            )
+
+    def testTensorFlowDatasetWithRaggedTensorRepresentation(self):
+        schema = text_format.Parse(
+            """
       feature {
         name: "varlen_feature"
         type: INT
@@ -526,115 +561,134 @@ def testTensorFlowDatasetWithRaggedTensorRepresentation(self):
           }
         }
       }
-    """, schema_pb2.Schema())
-    tfxio = self._MakeTFXIO(schema)
-    projected_tfxio = tfxio.Project(["ragged"])
-
-    expected_column_values = {
-        "varlen_feature":
-            pa.array([[1, 2, 3], [4], [5, 6]], type=pa.large_list(pa.int64())),
-        "row_lengths":
-            pa.array([[2, 1], [1], [1, 1]], type=pa.large_list(pa.int64())),
-    }
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-
-      self.assertIsInstance(record_batch, pa.RecordBatch)
-      self.assertEqual(record_batch.num_rows, 3)
-      print(record_batch.schema)
-      for i, field in enumerate(record_batch.schema):
-        self.assertTrue(
-            record_batch.column(i).equals(expected_column_values[field.name]),
-            "Column {} did not match ({} vs {}).".format(
-                field.name, record_batch.column(i),
-                expected_column_values[field.name]))
-
-      # self._ValidateRecordBatch(tfxio, record_batch)
-      expected_schema = projected_tfxio.ArrowSchema()
-      self.assertTrue(
-          record_batch.schema.equals(expected_schema),
-          "actual: {}; expected: {}".format(record_batch.schema,
-                                            expected_schema))
-      tensor_adapter = projected_tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 1)
-      self.assertIn("ragged", dict_of_tensors)
-
-      if tf.executing_eagerly():
-        ragged_factory = tf.RaggedTensor.from_row_splits
-      else:
-        ragged_factory = tf.compat.v1.ragged.RaggedTensorValue
-      expected_tensor = ragged_factory(
-          values=ragged_factory(
-              values=[1, 2, 3, 4, 5, 6], row_splits=[0, 2, 3, 4, 5, 6]),
-          row_splits=[0, 2, 3, 5])
-      self.assertAllEqual(dict_of_tensors["ragged"], expected_tensor)
-
-    with beam.Pipeline() as p:
-      # Setting the betch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | projected_tfxio.BeamSource(
-          batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-    if tf.executing_eagerly():
-      ragged_factory = tf.RaggedTensor.from_row_splits
-    else:
-      ragged_factory = tf.compat.v1.ragged.RaggedTensorValue
-
-    expected_tensors = [
-        ragged_factory(
-            values=ragged_factory(values=[1, 2, 3], row_splits=[0, 2, 3]),
-            row_splits=[0, 2]),
-        ragged_factory(
-            values=ragged_factory(values=[4], row_splits=[0, 1]),
-            row_splits=[0, 1]),
-        ragged_factory(
-            values=ragged_factory(values=[5, 6], row_splits=[0, 1, 2]),
-            row_splits=[0, 2]),
-    ]
+    """,
+            schema_pb2.Schema(),
+        )
+        tfxio = self._MakeTFXIO(schema)
+        projected_tfxio = tfxio.Project(["ragged"])
+
+        expected_column_values = {
+            "varlen_feature": pa.array(
+                [[1, 2, 3], [4], [5, 6]], type=pa.large_list(pa.int64())
+            ),
+            "row_lengths": pa.array(
+                [[2, 1], [1], [1, 1]], type=pa.large_list(pa.int64())
+            ),
+        }
 
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, parsed_examples_dict in enumerate(
-        projected_tfxio.TensorFlowDataset(options)):
-      self.assertLen(parsed_examples_dict, 1)
-      self.assertIn("ragged", parsed_examples_dict)
-      self.assertAllEqual(parsed_examples_dict["ragged"], expected_tensors[i])
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testTensorFlowDatasetWithLabelKey(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1, label_key="string_feature")
-    for i, (parsed_examples_dict, label_feature) in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      self._AssertSparseTensorEqual(
-          label_feature, _EXAMPLES_AS_TENSORS[i]["string_feature"])
-      self.assertLen(parsed_examples_dict, 2)
-      for tensor_name, tensor in parsed_examples_dict.items():
-        self._AssertSparseTensorEqual(
-            tensor, _EXAMPLES_AS_TENSORS[i][tensor_name])
-
-  @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
-  def testProjectedTensorFlowDataset(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    feature_name = "string_feature"
-    projected_tfxio = tfxio.Project([feature_name])
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, parsed_examples_dict in enumerate(
-        projected_tfxio.TensorFlowDataset(options=options)):
-      self.assertIn(feature_name, parsed_examples_dict)
-      self.assertLen(parsed_examples_dict, 1)
-      self._AssertSparseTensorEqual(parsed_examples_dict[feature_name],
-                                    _EXAMPLES_AS_TENSORS[i][feature_name])
-
-  @parameterized.named_parameters(*[
-      dict(
-          testcase_name="same_feature_name",
-          schema_pbtxt="""
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+
+            self.assertIsInstance(record_batch, pa.RecordBatch)
+            self.assertEqual(record_batch.num_rows, 3)
+            print(record_batch.schema)
+            for i, field in enumerate(record_batch.schema):
+                self.assertTrue(
+                    record_batch.column(i).equals(expected_column_values[field.name]),
+                    f"Column {field.name} did not match ({record_batch.column(i)} vs {expected_column_values[field.name]}).",
+                )
+
+            # self._ValidateRecordBatch(tfxio, record_batch)
+            expected_schema = projected_tfxio.ArrowSchema()
+            self.assertTrue(
+                record_batch.schema.equals(expected_schema),
+                f"actual: {record_batch.schema}; expected: {expected_schema}",
+            )
+            tensor_adapter = projected_tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 1)
+            self.assertIn("ragged", dict_of_tensors)
+
+            if tf.executing_eagerly():
+                ragged_factory = tf.RaggedTensor.from_row_splits
+            else:
+                ragged_factory = tf.compat.v1.ragged.RaggedTensorValue
+            expected_tensor = ragged_factory(
+                values=ragged_factory(
+                    values=[1, 2, 3, 4, 5, 6], row_splits=[0, 2, 3, 4, 5, 6]
+                ),
+                row_splits=[0, 2, 3, 5],
+            )
+            self.assertAllEqual(dict_of_tensors["ragged"], expected_tensor)
+
+        with beam.Pipeline() as p:
+            # Setting the betch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | projected_tfxio.BeamSource(
+                batch_size=len(_EXAMPLES)
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+        if tf.executing_eagerly():
+            ragged_factory = tf.RaggedTensor.from_row_splits
+        else:
+            ragged_factory = tf.compat.v1.ragged.RaggedTensorValue
+
+        expected_tensors = [
+            ragged_factory(
+                values=ragged_factory(values=[1, 2, 3], row_splits=[0, 2, 3]),
+                row_splits=[0, 2],
+            ),
+            ragged_factory(
+                values=ragged_factory(values=[4], row_splits=[0, 1]), row_splits=[0, 1]
+            ),
+            ragged_factory(
+                values=ragged_factory(values=[5, 6], row_splits=[0, 1, 2]),
+                row_splits=[0, 2],
+            ),
+        ]
+
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, parsed_examples_dict in enumerate(
+            projected_tfxio.TensorFlowDataset(options)
+        ):
+            self.assertLen(parsed_examples_dict, 1)
+            self.assertIn("ragged", parsed_examples_dict)
+            self.assertAllEqual(parsed_examples_dict["ragged"], expected_tensors[i])
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testTensorFlowDatasetWithLabelKey(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1, label_key="string_feature"
+        )
+        for i, (parsed_examples_dict, label_feature) in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            self._AssertSparseTensorEqual(
+                label_feature, _EXAMPLES_AS_TENSORS[i]["string_feature"]
+            )
+            self.assertLen(parsed_examples_dict, 2)
+            for tensor_name, tensor in parsed_examples_dict.items():
+                self._AssertSparseTensorEqual(
+                    tensor, _EXAMPLES_AS_TENSORS[i][tensor_name]
+                )
+
+    @unittest.skipIf(not tf.executing_eagerly(), "Skip in non-eager mode.")
+    def testProjectedTensorFlowDataset(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        feature_name = "string_feature"
+        projected_tfxio = tfxio.Project([feature_name])
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, parsed_examples_dict in enumerate(
+            projected_tfxio.TensorFlowDataset(options=options)
+        ):
+            self.assertIn(feature_name, parsed_examples_dict)
+            self.assertLen(parsed_examples_dict, 1)
+            self._AssertSparseTensorEqual(
+                parsed_examples_dict[feature_name],
+                _EXAMPLES_AS_TENSORS[i][feature_name],
+            )
+
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="same_feature_name",
+                schema_pbtxt="""
             feature {
               name: "string_feature"
               type: BYTES
@@ -653,13 +707,14 @@ def testProjectedTensorFlowDataset(self):
               }
             }
           """,
-          expected_parsing_config={
-              "string_feature": tf.io.VarLenFeature(dtype=tf.string)
-          },
-          expected_rename_dict={"string_feature": "string_feature"}),
-      dict(
-          testcase_name="rename_one_feature",
-          schema_pbtxt="""
+                expected_parsing_config={
+                    "string_feature": tf.io.VarLenFeature(dtype=tf.string)
+                },
+                expected_rename_dict={"string_feature": "string_feature"},
+            ),
+            dict(
+                testcase_name="rename_one_feature",
+                schema_pbtxt="""
             feature {
               name: "string_feature"
               type: BYTES
@@ -678,13 +733,14 @@ def testProjectedTensorFlowDataset(self):
               }
             }
           """,
-          expected_parsing_config={
-              "string_feature": tf.io.VarLenFeature(dtype=tf.string)
-          },
-          expected_rename_dict={"string_feature": "var_len_feature_1"}),
-      dict(
-          testcase_name="sparse_feature",
-          schema_pbtxt="""
+                expected_parsing_config={
+                    "string_feature": tf.io.VarLenFeature(dtype=tf.string)
+                },
+                expected_rename_dict={"string_feature": "var_len_feature_1"},
+            ),
+            dict(
+                testcase_name="sparse_feature",
+                schema_pbtxt="""
             feature {
               name: "idx"
               type: INT
@@ -713,20 +769,18 @@ def testProjectedTensorFlowDataset(self):
               }
             }
           """,
-          expected_parsing_config={
-              "_tfx_bsl_sparse_feature_sparse_feature":
-                  tf.io.SparseFeature(
-                      index_key=["idx"],
-                      value_key="val",
-                      size=[1],
-                      dtype=tf.float32)
-          },
-          expected_rename_dict={
-              "_tfx_bsl_sparse_feature_sparse_feature": "sparse_feature"
-          }),
-      dict(
-          testcase_name="sparse_and_varlen_features_shared",
-          schema_pbtxt="""
+                expected_parsing_config={
+                    "_tfx_bsl_sparse_feature_sparse_feature": tf.io.SparseFeature(
+                        index_key=["idx"], value_key="val", size=[1], dtype=tf.float32
+                    )
+                },
+                expected_rename_dict={
+                    "_tfx_bsl_sparse_feature_sparse_feature": "sparse_feature"
+                },
+            ),
+            dict(
+                testcase_name="sparse_and_varlen_features_shared",
+                schema_pbtxt="""
             feature {
               name: "idx"
               type: INT
@@ -763,35 +817,33 @@ def testProjectedTensorFlowDataset(self):
               }
             }
           """,
-          expected_parsing_config={
-              "_tfx_bsl_sparse_feature_sparse_feature":
-                  tf.io.SparseFeature(
-                      index_key=["idx"],
-                      value_key="val",
-                      size=[1],
-                      dtype=tf.float32),
-              "val":
-                  tf.io.VarLenFeature(dtype=tf.float32)
-          },
-          expected_rename_dict={
-              "_tfx_bsl_sparse_feature_sparse_feature": "sparse_feature",
-              "val": "varlen"
-          }),
-  ])
-  def testValidGetTfExampleParserConfig(self, schema_pbtxt,
-                                        expected_parsing_config,
-                                        expected_rename_dict):
-    schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
-    tfxio = self._MakeTFXIO(schema)
-
-    parser_config, rename_dict = tfxio._GetTfExampleParserConfig()
-
-    self.assertAllEqual(expected_parsing_config, parser_config)
-    self.assertAllEqual(expected_rename_dict, rename_dict)
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def testValidGetTfExampleParserConfigWithRaggedFeature(self):
-    schema_pbtxt = """
+                expected_parsing_config={
+                    "_tfx_bsl_sparse_feature_sparse_feature": tf.io.SparseFeature(
+                        index_key=["idx"], value_key="val", size=[1], dtype=tf.float32
+                    ),
+                    "val": tf.io.VarLenFeature(dtype=tf.float32),
+                },
+                expected_rename_dict={
+                    "_tfx_bsl_sparse_feature_sparse_feature": "sparse_feature",
+                    "val": "varlen",
+                },
+            ),
+        ]
+    )
+    def testValidGetTfExampleParserConfig(
+        self, schema_pbtxt, expected_parsing_config, expected_rename_dict
+    ):
+        schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
+        tfxio = self._MakeTFXIO(schema)
+
+        parser_config, rename_dict = tfxio._GetTfExampleParserConfig()
+
+        self.assertAllEqual(expected_parsing_config, parser_config)
+        self.assertAllEqual(expected_rename_dict, rename_dict)
+
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def testValidGetTfExampleParserConfigWithRaggedFeature(self):
+        schema_pbtxt = """
       feature {
         name: "row_lengths"
         type: INT
@@ -817,32 +869,33 @@ def testValidGetTfExampleParserConfigWithRaggedFeature(self):
         }
       }
     """
-    schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
-    tfxio = self._MakeTFXIO(schema)
+        schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
+        tfxio = self._MakeTFXIO(schema)
 
-    parser_config, rename_dict = tfxio._GetTfExampleParserConfig()
+        parser_config, rename_dict = tfxio._GetTfExampleParserConfig()
 
-    expected_parsing_config = {
-        "_tfx_bsl_ragged_feature_ragged_feature":
-            tf.io.RaggedFeature(
+        expected_parsing_config = {
+            "_tfx_bsl_ragged_feature_ragged_feature": tf.io.RaggedFeature(
                 value_key="val",
                 partitions=[
                     tf.io.RaggedFeature.RowLengths("row_lengths"),
                     tf.io.RaggedFeature.UniformRowLength(2),
                 ],
-                dtype=tf.float32),
-    }
-    expected_rename_dict = {
-        "_tfx_bsl_ragged_feature_ragged_feature": "ragged_feature",
-    }
+                dtype=tf.float32,
+            ),
+        }
+        expected_rename_dict = {
+            "_tfx_bsl_ragged_feature_ragged_feature": "ragged_feature",
+        }
 
-    self.assertAllEqual(expected_parsing_config, parser_config)
-    self.assertAllEqual(expected_rename_dict, rename_dict)
+        self.assertAllEqual(expected_parsing_config, parser_config)
+        self.assertAllEqual(expected_rename_dict, rename_dict)
 
-  @parameterized.named_parameters(*[
-      dict(
-          testcase_name="invalid_duplicate_feature",
-          schema_pbtxt="""
+    @parameterized.named_parameters(
+        *[
+            dict(
+                testcase_name="invalid_duplicate_feature",
+                schema_pbtxt="""
             feature {
               name: "string_feature"
               type: BYTES
@@ -873,11 +926,12 @@ def testValidGetTfExampleParserConfigWithRaggedFeature(self):
               }
             }
           """,
-          error=ValueError,
-          error_string="Unable to create a valid parsing config.*"),
-      dict(
-          testcase_name="sparse_and_fixed_feature",
-          schema_pbtxt="""
+                error=ValueError,
+                error_string="Unable to create a valid parsing config.*",
+            ),
+            dict(
+                testcase_name="sparse_and_fixed_feature",
+                schema_pbtxt="""
             feature {
               name: "idx"
               type: INT
@@ -922,57 +976,58 @@ def testValidGetTfExampleParserConfigWithRaggedFeature(self):
               }
             }
           """,
-          error=ValueError,
-          error_string="Unable to create a valid parsing config.*"),
-      dict(
-          testcase_name="no_schema",
-          schema_pbtxt="",
-          error=ValueError,
-          error_string="Unable to create a parsing config because no schema.*"),
-  ])
-  def testInvalidGetTfExampleParserConfig(self, schema_pbtxt, error,
-                                          error_string):
-    if not schema_pbtxt:
-      schema = None
-    else:
-      schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
-    tfxio = self._MakeTFXIO(schema)
+                error=ValueError,
+                error_string="Unable to create a valid parsing config.*",
+            ),
+            dict(
+                testcase_name="no_schema",
+                schema_pbtxt="",
+                error=ValueError,
+                error_string="Unable to create a parsing config because no schema.*",
+            ),
+        ]
+    )
+    def testInvalidGetTfExampleParserConfig(self, schema_pbtxt, error, error_string):
+        if not schema_pbtxt:
+            schema = None
+        else:
+            schema = text_format.Parse(schema_pbtxt, schema_pb2.Schema())
+        tfxio = self._MakeTFXIO(schema)
 
-    with self.assertRaisesRegex(error, error_string):
-      tfxio._GetTfExampleParserConfig()
+        with self.assertRaisesRegex(error, error_string):
+            tfxio._GetTfExampleParserConfig()
 
 
 class TFExampleBeamRecordTest(absltest.TestCase):
-
-  @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
-  def testE2E(self):
-    raw_record_column_name = "raw_record"
-    tfxio = tf_example_record.TFExampleBeamRecord(
-        physical_format="inmem",
-        telemetry_descriptors=["some", "component"],
-        schema=_SCHEMA,
-        raw_record_column_name=raw_record_column_name,
-    )
-
-    def _AssertFn(record_batches):
-      self.assertLen(record_batches, 1)
-      record_batch = record_batches[0]
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 3)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("string_feature", dict_of_tensors)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          p
-          | "CreateInMemRecords" >> beam.Create(_SERIALIZED_EXAMPLES)
-          | "BeamSource" >>
-          tfxio.BeamSource(batch_size=len(_SERIALIZED_EXAMPLES)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+    @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
+    def testE2E(self):
+        raw_record_column_name = "raw_record"
+        tfxio = tf_example_record.TFExampleBeamRecord(
+            physical_format="inmem",
+            telemetry_descriptors=["some", "component"],
+            schema=_SCHEMA,
+            raw_record_column_name=raw_record_column_name,
+        )
+
+        def _AssertFn(record_batches):
+            self.assertLen(record_batches, 1)
+            record_batch = record_batches[0]
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 3)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("string_feature", dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = (
+                p
+                | "CreateInMemRecords" >> beam.Create(_SERIALIZED_EXAMPLES)
+                | "BeamSource" >> tfxio.BeamSource(batch_size=len(_SERIALIZED_EXAMPLES))
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tfxio/tf_sequence_example_record.py b/tfx_bsl/tfxio/tf_sequence_example_record.py
index 0cb9d4f2..6315a504 100644
--- a/tfx_bsl/tfxio/tf_sequence_example_record.py
+++ b/tfx_bsl/tfxio/tf_sequence_example_record.py
@@ -19,322 +19,346 @@
 import apache_beam as beam
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.arrow import path
-from tfx_bsl.coders import batch_util
-from tfx_bsl.coders import sequence_example_coder
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import dataset_util
-from tfx_bsl.tfxio import record_based_tfxio
-from tfx_bsl.tfxio import tensor_adapter
-from tfx_bsl.tfxio import tensor_representation_util
-from tfx_bsl.tfxio import tfxio
-
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.arrow import path
+from tfx_bsl.coders import batch_util, sequence_example_coder
+from tfx_bsl.tfxio import (
+    dataset_options,
+    dataset_util,
+    record_based_tfxio,
+    tensor_adapter,
+    tensor_representation_util,
+    tfxio,
+)
 
 _SEQUENCE_COLUMN_NAME = "##SEQUENCE##"
 
 
 class _TFSequenceExampleRecordBase(record_based_tfxio.RecordBasedTFXIO):
-  """Base class for TFXIO classes for record based tf.SequenceExamples."""
-
-  def __init__(self,
-               schema: Optional[schema_pb2.Schema],
-               raw_record_column_name: Optional[Text],
-               telemetry_descriptors: List[Text],
-               physical_format: Text):
-    super().__init__(
-        telemetry_descriptors=telemetry_descriptors,
-        raw_record_column_name=raw_record_column_name,
-        logical_format="tf_sequence_example",
-        physical_format=physical_format)
-    self._schema = schema
-
-  @property
-  def schema(self) -> schema_pb2.Schema:
-    if self._schema is None:
-      raise ValueError("Schema is undefined.")
-    return self._schema
-
-  def SupportAttachingRawRecords(self) -> bool:
-    return True
-
-  @abc.abstractmethod
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    """Returns a PTransform that produces PCollection[bytes]."""
-
-  def _RawRecordToRecordBatchInternal(self,
-                                      batch_size: Optional[int] = None
-                                     ) -> beam.PTransform:
-
-    @beam.typehints.with_input_types(bytes)
-    @beam.typehints.with_output_types(pa.RecordBatch)
-    def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
-      return (
-          raw_records_pcoll
-          | "Batch"
-          >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
-          | "Decode"
-          >> beam.ParDo(
-              _DecodeBatchExamplesDoFn(
-                  self._schema, self.raw_record_column_name
-              )
-          )
-      )
-
-    return beam.ptransform_fn(_PTransformFn)()
-
-  def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
-    return sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-        _SEQUENCE_COLUMN_NAME, self.schema.SerializeToString()
-    ).ArrowSchema()
-
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
-        self._schema)
-
-  def _ParseRawRecordTensorFlowDataset(
-      self,
-      raw_record_dataset: tf.data.Dataset,
-      label_key: Optional[str] = None) -> tf.data.Dataset:
-    """Parses a dataset of serialized SequenceExamples."""
-
-    context_features, sequence_features = (
-        tensor_representation_util.CreateTfSequenceExampleParserConfig(
-            self._schema))
-
-    # Parse `SequenceExample` tensors to dictionaries of context and sequence
-    # tensors and merge them.
-    def _ParseAndMerge(serialized):
-      context, sequence, _ = tf.io.parse_sequence_example(
-          serialized, context_features, sequence_features)
-      return {**context, **sequence}
-
-    dataset = raw_record_dataset.map(
-        _ParseAndMerge, num_parallel_calls=tf.data.AUTOTUNE)
-
-    if label_key is not None:
-      dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
-
-    return dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
-
-  def _ProjectTfmdSchema(self, tensor_names: List[Text]) -> schema_pb2.Schema:
-    """Projects self._schema by the given tensor names."""
-    tensor_representations = self.TensorRepresentations()
-    tensor_names = set(tensor_names)
-    if not tensor_names.issubset(tensor_representations):
-      raise ValueError(
-          "Unable to project {} because they were not in the original "
-          "TensorRepresentations.".format(tensor_names -
-                                          tensor_representations))
-    used_paths = set()
-    for tensor_name in tensor_names:
-      used_paths.update(
-          tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
-              tensor_representations[tensor_name]))
-    result = schema_pb2.Schema()
-    # Note: We only copy projected features into the new schema because the
-    # coder, and ArrowSchema() only care about Schema.feature. If they start
-    # depending on other Schema fields then those fields must also be projected.
-    for f in self.schema.feature:
-      p = path.ColumnPath(f.name)
-      if f.name == _SEQUENCE_COLUMN_NAME:
-        if f.type != schema_pb2.STRUCT:
-          raise ValueError(
-              "Feature {} was expected to be of type STRUCT, but got {}"
-              .format(f.name, f))
-        result_sequence_struct = schema_pb2.Feature()
-        result_sequence_struct.CopyFrom(f)
-        result_sequence_struct.ClearField("struct_domain")
-        any_sequence_feature_projected = False
-        for sf in f.struct_domain.feature:
-          sequence_feature_path = p.child(sf.name)
-          if sequence_feature_path in used_paths:
-            any_sequence_feature_projected = True
-            result_sequence_struct.struct_domain.feature.add().CopyFrom(sf)
-        if any_sequence_feature_projected:
-          result.feature.add().CopyFrom(result_sequence_struct)
-      elif p in used_paths:
-        result.feature.add().CopyFrom(f)
-
-    tensor_representation_util.SetTensorRepresentationsInSchema(
-        result,
-        {k: v for k, v in tensor_representations.items() if k in tensor_names})
-
-    return result
+    """Base class for TFXIO classes for record based tf.SequenceExamples."""
+
+    def __init__(
+        self,
+        schema: Optional[schema_pb2.Schema],
+        raw_record_column_name: Optional[str],
+        telemetry_descriptors: List[str],
+        physical_format: str,
+    ):
+        super().__init__(
+            telemetry_descriptors=telemetry_descriptors,
+            raw_record_column_name=raw_record_column_name,
+            logical_format="tf_sequence_example",
+            physical_format=physical_format,
+        )
+        self._schema = schema
+
+    @property
+    def schema(self) -> schema_pb2.Schema:
+        if self._schema is None:
+            raise ValueError("Schema is undefined.")
+        return self._schema
+
+    def SupportAttachingRawRecords(self) -> bool:
+        return True
+
+    @abc.abstractmethod
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        """Returns a PTransform that produces PCollection[bytes]."""
+
+    def _RawRecordToRecordBatchInternal(
+        self, batch_size: Optional[int] = None
+    ) -> beam.PTransform:
+        @beam.typehints.with_input_types(bytes)
+        @beam.typehints.with_output_types(pa.RecordBatch)
+        def _PTransformFn(raw_records_pcoll: beam.pvalue.PCollection):
+            return (
+                raw_records_pcoll
+                | "Batch"
+                >> batch_util.BatchRecords(batch_size, self._telemetry_descriptors)
+                | "Decode"
+                >> beam.ParDo(
+                    _DecodeBatchExamplesDoFn(self._schema, self.raw_record_column_name)
+                )
+            )
+
+        return beam.ptransform_fn(_PTransformFn)()
+
+    def _ArrowSchemaNoRawRecordColumn(self) -> pa.Schema:
+        return sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+            _SEQUENCE_COLUMN_NAME, self.schema.SerializeToString()
+        ).ArrowSchema()
+
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return tensor_representation_util.InferTensorRepresentationsFromMixedSchema(
+            self._schema
+        )
+
+    def _ParseRawRecordTensorFlowDataset(
+        self, raw_record_dataset: tf.data.Dataset, label_key: Optional[str] = None
+    ) -> tf.data.Dataset:
+        """Parses a dataset of serialized SequenceExamples."""
+        context_features, sequence_features = (
+            tensor_representation_util.CreateTfSequenceExampleParserConfig(self._schema)
+        )
+
+        # Parse `SequenceExample` tensors to dictionaries of context and sequence
+        # tensors and merge them.
+        def _ParseAndMerge(serialized):
+            context, sequence, _ = tf.io.parse_sequence_example(
+                serialized, context_features, sequence_features
+            )
+            return {**context, **sequence}
+
+        dataset = raw_record_dataset.map(
+            _ParseAndMerge, num_parallel_calls=tf.data.AUTOTUNE
+        )
+
+        if label_key is not None:
+            dataset = self._PopLabelFeatureFromDataset(dataset, label_key)
+
+        return dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
+
+    def _ProjectTfmdSchema(self, tensor_names: List[str]) -> schema_pb2.Schema:
+        """Projects self._schema by the given tensor names."""
+        tensor_representations = self.TensorRepresentations()
+        tensor_names = set(tensor_names)
+        if not tensor_names.issubset(tensor_representations):
+            raise ValueError(
+                f"Unable to project {tensor_names - tensor_representations} because they were not in the original "
+                "TensorRepresentations."
+            )
+        used_paths = set()
+        for tensor_name in tensor_names:
+            used_paths.update(
+                tensor_representation_util.GetSourceColumnsFromTensorRepresentation(
+                    tensor_representations[tensor_name]
+                )
+            )
+        result = schema_pb2.Schema()
+        # Note: We only copy projected features into the new schema because the
+        # coder, and ArrowSchema() only care about Schema.feature. If they start
+        # depending on other Schema fields then those fields must also be projected.
+        for f in self.schema.feature:
+            p = path.ColumnPath(f.name)
+            if f.name == _SEQUENCE_COLUMN_NAME:
+                if f.type != schema_pb2.STRUCT:
+                    raise ValueError(
+                        f"Feature {f.name} was expected to be of type STRUCT, but got {f}"
+                    )
+                result_sequence_struct = schema_pb2.Feature()
+                result_sequence_struct.CopyFrom(f)
+                result_sequence_struct.ClearField("struct_domain")
+                any_sequence_feature_projected = False
+                for sf in f.struct_domain.feature:
+                    sequence_feature_path = p.child(sf.name)
+                    if sequence_feature_path in used_paths:
+                        any_sequence_feature_projected = True
+                        result_sequence_struct.struct_domain.feature.add().CopyFrom(sf)
+                if any_sequence_feature_projected:
+                    result.feature.add().CopyFrom(result_sequence_struct)
+            elif p in used_paths:
+                result.feature.add().CopyFrom(f)
+
+        tensor_representation_util.SetTensorRepresentationsInSchema(
+            result,
+            {k: v for k, v in tensor_representations.items() if k in tensor_names},
+        )
+
+        return result
 
 
 class TFSequenceExampleBeamRecord(_TFSequenceExampleRecordBase):
-  """TFXIO implementation for serialized tf.SequenceExamples in pcoll[bytes].
-
-  This is a special TFXIO that does not actually do I/O -- it relies on the
-  caller to prepare a PCollection of bytes (serialized tf.SequenceExamples).
-  """
-
-  def __init__(self,
-               physical_format: Text,
-               telemetry_descriptors: List[Text],
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[Text] = None):
-    """Initializer.
-
-    Args:
-      physical_format: The physical format that describes where the input
-        pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
-        "tfrecord".
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      schema: A TFMD Schema describing the dataset.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
+    """TFXIO implementation for serialized tf.SequenceExamples in pcoll[bytes].
+
+    This is a special TFXIO that does not actually do I/O -- it relies on the
+    caller to prepare a PCollection of bytes (serialized tf.SequenceExamples).
     """
-    super().__init__(
-        schema=schema, raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format=physical_format)
 
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return (beam.ptransform_fn(lambda x: x)()
+    def __init__(
+        self,
+        physical_format: str,
+        telemetry_descriptors: List[str],
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        """Initializer.
+
+        Args:
+        ----
+          physical_format: The physical format that describes where the input
+            pcoll[bytes] comes from. Used for telemetry purposes. Examples: "text",
+            "tfrecord".
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          schema: A TFMD Schema describing the dataset.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+        """
+        super().__init__(
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format=physical_format,
+        )
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return (
+            beam.ptransform_fn(lambda x: x)()
             .with_input_types(bytes)
-            .with_output_types(bytes))
+            .with_output_types(bytes)
+        )
 
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    projected_schema = self._ProjectTfmdSchema(tensor_names)
-    return TFSequenceExampleBeamRecord(self._physical_format,
-                                       self.telemetry_descriptors,
-                                       projected_schema,
-                                       self.raw_record_column_name)
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        projected_schema = self._ProjectTfmdSchema(tensor_names)
+        return TFSequenceExampleBeamRecord(
+            self._physical_format,
+            self.telemetry_descriptors,
+            projected_schema,
+            self.raw_record_column_name,
+        )
 
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
 
-  def TensorFlowDataset(self,
-                        options: dataset_options.TensorFlowDatasetOptions):
-    raise NotImplementedError(
-        "TFExampleBeamRecord is unable to provide a TensorFlowDataset "
-        "because it does not do I/O")
+    def TensorFlowDataset(self, options: dataset_options.TensorFlowDatasetOptions):
+        raise NotImplementedError(
+            "TFExampleBeamRecord is unable to provide a TensorFlowDataset "
+            "because it does not do I/O"
+        )
 
 
 class TFSequenceExampleRecord(_TFSequenceExampleRecordBase):
-  """TFXIO implementation for tf.SequenceExample on TFRecord."""
-
-  def __init__(self,
-               file_pattern: Union[List[Text], Text],
-               telemetry_descriptors: List[Text],
-               validate: bool = True,
-               schema: Optional[schema_pb2.Schema] = None,
-               raw_record_column_name: Optional[Text] = None):
-    """Initializes a TFSequenceExampleRecord TFXIO.
-
-    Args:
-      file_pattern: One or a list of glob patterns. If a list, must not be
-        empty.
-      telemetry_descriptors: A set of descriptors that identify the component
-        that is instantiating this TFXIO. These will be used to construct the
-        namespace to contain metrics for profiling and are therefore expected to
-        be identifiers of the component itself and not individual instances of
-        source use.
-      validate: Not used. do not set. (not used since post 0.22.1).
-      schema: A TFMD Schema describing the dataset.
-      raw_record_column_name: If not None, the generated Arrow RecordBatches
-        will contain a column of the given name that contains serialized
-        records.
-    """
-    super().__init__(
-        schema=schema, raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=telemetry_descriptors,
-        physical_format="tfrecords_gzip")
-    del validate
-    if not isinstance(file_pattern, list):
-      file_pattern = [file_pattern]
-    assert file_pattern, "Must provide at least one file pattern."
-    self._file_pattern = file_pattern
-
-  def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
-    return record_based_tfxio.ReadTfRecord(self._file_pattern)
-
-  def _ProjectImpl(self, tensor_names: List[Text]) -> tfxio.TFXIO:
-    projected_schema = self._ProjectTfmdSchema(tensor_names)
-    return TFSequenceExampleRecord(
-        file_pattern=self._file_pattern,
-        telemetry_descriptors=self.telemetry_descriptors,
-        schema=projected_schema,
-        raw_record_column_name=self.raw_record_column_name)
-
-  def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
-    raise NotImplementedError
-
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    """Creates a tf.data.Dataset that yields Tensors.
-
-    The serialized tf.SequenceExamples are parsed by
-    `tf.io.parse_sequence_example`.
-
-    See base class (tfxio.TFXIO) for more details.
-
-    Args:
-      options: an options object for the tf.data.Dataset. See
-        `dataset_options.TensorFlowDatasetOptions` for more details.
-
-    Returns:
-      A dataset of `dict` elements, (or a tuple of `dict` elements and label).
-      Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
-      `RaggedTensor` objects.
-
-    Raises:
-      ValueError: if there is something wrong with the provided schema.
-    """
-    file_pattern = tf.convert_to_tensor(self._file_pattern)
-    dataset = dataset_util.make_tf_record_dataset(
-        file_pattern,
-        batch_size=options.batch_size,
-        num_epochs=options.num_epochs,
-        shuffle=options.shuffle,
-        shuffle_buffer_size=options.shuffle_buffer_size,
-        shuffle_seed=options.shuffle_seed,
-        reader_num_threads=options.reader_num_threads,
-        drop_final_batch=options.drop_final_batch)
-
-    return self._ParseRawRecordTensorFlowDataset(dataset, options.label_key)
+    """TFXIO implementation for tf.SequenceExample on TFRecord."""
+
+    def __init__(
+        self,
+        file_pattern: Union[List[str], str],
+        telemetry_descriptors: List[str],
+        validate: bool = True,
+        schema: Optional[schema_pb2.Schema] = None,
+        raw_record_column_name: Optional[str] = None,
+    ):
+        """Initializes a TFSequenceExampleRecord TFXIO.
+
+        Args:
+        ----
+          file_pattern: One or a list of glob patterns. If a list, must not be
+            empty.
+          telemetry_descriptors: A set of descriptors that identify the component
+            that is instantiating this TFXIO. These will be used to construct the
+            namespace to contain metrics for profiling and are therefore expected to
+            be identifiers of the component itself and not individual instances of
+            source use.
+          validate: Not used. do not set. (not used since post 0.22.1).
+          schema: A TFMD Schema describing the dataset.
+          raw_record_column_name: If not None, the generated Arrow RecordBatches
+            will contain a column of the given name that contains serialized
+            records.
+        """
+        super().__init__(
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=telemetry_descriptors,
+            physical_format="tfrecords_gzip",
+        )
+        del validate
+        if not isinstance(file_pattern, list):
+            file_pattern = [file_pattern]
+        assert file_pattern, "Must provide at least one file pattern."
+        self._file_pattern = file_pattern
+
+    def _RawRecordBeamSourceInternal(self) -> beam.PTransform:
+        return record_based_tfxio.ReadTfRecord(self._file_pattern)
+
+    def _ProjectImpl(self, tensor_names: List[str]) -> tfxio.TFXIO:
+        projected_schema = self._ProjectTfmdSchema(tensor_names)
+        return TFSequenceExampleRecord(
+            file_pattern=self._file_pattern,
+            telemetry_descriptors=self.telemetry_descriptors,
+            schema=projected_schema,
+            raw_record_column_name=self.raw_record_column_name,
+        )
+
+    def RecordBatches(self, options: dataset_options.RecordBatchesOptions):
+        raise NotImplementedError
+
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        """Creates a tf.data.Dataset that yields Tensors.
+
+        The serialized tf.SequenceExamples are parsed by
+        `tf.io.parse_sequence_example`.
+
+        See base class (tfxio.TFXIO) for more details.
+
+        Args:
+        ----
+          options: an options object for the tf.data.Dataset. See
+            `dataset_options.TensorFlowDatasetOptions` for more details.
+
+        Returns:
+        -------
+          A dataset of `dict` elements, (or a tuple of `dict` elements and label).
+          Each `dict` maps feature keys to `Tensor`, `SparseTensor`, or
+          `RaggedTensor` objects.
+
+        Raises:
+        ------
+          ValueError: if there is something wrong with the provided schema.
+        """
+        file_pattern = tf.convert_to_tensor(self._file_pattern)
+        dataset = dataset_util.make_tf_record_dataset(
+            file_pattern,
+            batch_size=options.batch_size,
+            num_epochs=options.num_epochs,
+            shuffle=options.shuffle,
+            shuffle_buffer_size=options.shuffle_buffer_size,
+            shuffle_seed=options.shuffle_seed,
+            reader_num_threads=options.reader_num_threads,
+            drop_final_batch=options.drop_final_batch,
+        )
+
+        return self._ParseRawRecordTensorFlowDataset(dataset, options.label_key)
 
 
 @beam.typehints.with_input_types(List[bytes])
 @beam.typehints.with_output_types(pa.RecordBatch)
 class _DecodeBatchExamplesDoFn(beam.DoFn):
-  """Batches serialized protos bytes and decode them into an Arrow table."""
-
-  def __init__(self, schema: Optional[schema_pb2.Schema],
-               raw_record_column_name: Optional[Text]):
-    """Initializer."""
-    self._serialized_schema = None
-    if schema is not None:
-      # Serialize to avoid storing TFMD protos. See b/167128119 for the reason.
-      self._serialized_schema = schema.SerializeToString()
-    self._raw_record_column_name = raw_record_column_name
-    self._decoder = None
-
-  def setup(self):
-    if self._serialized_schema:
-      self._decoder = (
-          sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-              _SEQUENCE_COLUMN_NAME,
-              self._serialized_schema))
-    else:
-      self._decoder = (
-          sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
-              _SEQUENCE_COLUMN_NAME))
-
-  def process(self, examples: List[bytes]):
-    assert self._decoder is not None  # Workflow ran setup().
-    decoded = self._decoder.DecodeBatch(examples)
-    if self._raw_record_column_name is None:
-      yield decoded
-    else:
-      yield record_based_tfxio.AppendRawRecordColumn(
-          decoded, self._raw_record_column_name, examples)
+    """Batches serialized protos bytes and decode them into an Arrow table."""
+
+    def __init__(
+        self, schema: Optional[schema_pb2.Schema], raw_record_column_name: Optional[str]
+    ):
+        """Initializer."""
+        self._serialized_schema = None
+        if schema is not None:
+            # Serialize to avoid storing TFMD protos. See b/167128119 for the reason.
+            self._serialized_schema = schema.SerializeToString()
+        self._raw_record_column_name = raw_record_column_name
+        self._decoder = None
+
+    def setup(self):
+        if self._serialized_schema:
+            self._decoder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _SEQUENCE_COLUMN_NAME, self._serialized_schema
+            )
+        else:
+            self._decoder = sequence_example_coder.SequenceExamplesToRecordBatchDecoder(
+                _SEQUENCE_COLUMN_NAME
+            )
+
+    def process(self, examples: List[bytes]):
+        assert self._decoder is not None  # Workflow ran setup().
+        decoded = self._decoder.DecodeBatch(examples)
+        if self._raw_record_column_name is None:
+            yield decoded
+        else:
+            yield record_based_tfxio.AppendRawRecordColumn(
+                decoded, self._raw_record_column_name, examples
+            )
diff --git a/tfx_bsl/tfxio/tf_sequence_example_record_test.py b/tfx_bsl/tfxio/tf_sequence_example_record_test.py
index a0a02243..611ec30f 100644
--- a/tfx_bsl/tfxio/tf_sequence_example_record_test.py
+++ b/tfx_bsl/tfxio/tf_sequence_example_record_test.py
@@ -14,28 +14,30 @@
 """Tests for tfx_bsl.tfxio.tf_example_record."""
 
 import os
-import pytest
 
-from absl import flags
 import apache_beam as beam
-from apache_beam.testing import util as beam_testing_util
 import numpy as np
 import pyarrow as pa
+import pytest
 import tensorflow as tf
-from tfx_bsl.arrow import path
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import telemetry_test_util
-from tfx_bsl.tfxio import test_case
-from tfx_bsl.tfxio import tf_sequence_example_record
-
+from absl import flags
+from apache_beam.testing import util as beam_testing_util
 from google.protobuf import text_format
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.arrow import path
+from tfx_bsl.tfxio import (
+    dataset_options,
+    telemetry_test_util,
+    test_case,
+    tf_sequence_example_record,
+)
 
 FLAGS = flags.FLAGS
 
 _SEQUENCE_COLUMN_NAME = "##SEQUENCE##"
-_SCHEMA = text_format.Parse("""
+_SCHEMA = text_format.Parse(
+    """
   feature {
     name: "int_feature"
     type: INT
@@ -97,7 +99,9 @@
       }
     }
   }
-""".replace("$SEQ", _SEQUENCE_COLUMN_NAME), schema_pb2.Schema())
+""".replace("$SEQ", _SEQUENCE_COLUMN_NAME),
+    schema_pb2.Schema(),
+)
 
 _TELEMETRY_DESCRIPTORS = ["Some", "Component"]
 
@@ -168,295 +172,326 @@
 
 
 _EXPECTED_COLUMN_VALUES = {
-    path.ColumnPath(["int_feature"]):
-        pa.array([[1], [2], [3]], type=pa.large_list(pa.int64())),
-    path.ColumnPath(["float_feature"]):
-        pa.array([[1, 2, 3, 4], [2, 3, 4, 5], None],
-                 type=pa.large_list(pa.float32())),
-    path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]):
-        pa.array([[[1, 2], [3]], None, [[4]]],
-                 pa.large_list(pa.large_list(pa.int64()))),
-    path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]):
-        pa.array([None, [[b"foo", b"bar"], []], [[b"baz"]]],
-                 pa.large_list(pa.large_list(pa.large_binary())))
+    path.ColumnPath(["int_feature"]): pa.array(
+        [[1], [2], [3]], type=pa.large_list(pa.int64())
+    ),
+    path.ColumnPath(["float_feature"]): pa.array(
+        [[1, 2, 3, 4], [2, 3, 4, 5], None], type=pa.large_list(pa.float32())
+    ),
+    path.ColumnPath([_SEQUENCE_COLUMN_NAME, "int_feature"]): pa.array(
+        [[[1, 2], [3]], None, [[4]]], pa.large_list(pa.large_list(pa.int64()))
+    ),
+    path.ColumnPath([_SEQUENCE_COLUMN_NAME, "string_feature"]): pa.array(
+        [None, [[b"foo", b"bar"], []], [[b"baz"]]],
+        pa.large_list(pa.large_list(pa.large_binary())),
+    ),
 }
 
 
 def _CreateExamplesAsTensors():
-  if tf.executing_eagerly():
-    sparse_tensor_factory = tf.SparseTensor
-  else:
-    sparse_tensor_factory = tf.compat.v1.SparseTensorValue
-  ragged_tensor_factory = tf.ragged.constant
-
-  return [{
-      "int_feature":
-          sparse_tensor_factory(
-              values=[1], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[1.0, 2.0, 3.0, 4.0],
-              indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
-              dense_shape=[1, 4]),
-      "seq_string_feature":
-          ragged_tensor_factory([[]], dtype=tf.string, ragged_rank=2),
-      "seq_int_feature":
-          ragged_tensor_factory([[[1, 2], [3]]], dtype=tf.int64, ragged_rank=2),
-  }, {
-      "int_feature":
-          sparse_tensor_factory(
-              values=[2], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[2.0, 3.0, 4.0, 5.0],
-              indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
-              dense_shape=[1, 4]),
-      "seq_string_feature":
-          ragged_tensor_factory([[[b"foo", b"bar"], []]],
-                                dtype=tf.string,
-                                ragged_rank=2),
-      "seq_int_feature":
-          ragged_tensor_factory([[]], dtype=tf.int64, ragged_rank=2),
-  }, {
-      "int_feature":
-          sparse_tensor_factory(
-              values=[3], indices=[[0, 0]], dense_shape=[1, 1]),
-      "float_feature":
-          sparse_tensor_factory(
-              values=[], indices=np.empty((0, 2)), dense_shape=[1, 0]),
-      "seq_string_feature":
-          ragged_tensor_factory([[[b"baz"]]], dtype=tf.string, ragged_rank=2),
-      "seq_int_feature":
-          ragged_tensor_factory([[[4]]], dtype=tf.int64, ragged_rank=2),
-  }]
+    if tf.executing_eagerly():
+        sparse_tensor_factory = tf.SparseTensor
+    else:
+        sparse_tensor_factory = tf.compat.v1.SparseTensorValue
+    ragged_tensor_factory = tf.ragged.constant
+
+    return [
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[1], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[1.0, 2.0, 3.0, 4.0],
+                indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
+                dense_shape=[1, 4],
+            ),
+            "seq_string_feature": ragged_tensor_factory(
+                [[]], dtype=tf.string, ragged_rank=2
+            ),
+            "seq_int_feature": ragged_tensor_factory(
+                [[[1, 2], [3]]], dtype=tf.int64, ragged_rank=2
+            ),
+        },
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[2], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[2.0, 3.0, 4.0, 5.0],
+                indices=[[0, 0], [0, 1], [0, 2], [0, 3]],
+                dense_shape=[1, 4],
+            ),
+            "seq_string_feature": ragged_tensor_factory(
+                [[[b"foo", b"bar"], []]], dtype=tf.string, ragged_rank=2
+            ),
+            "seq_int_feature": ragged_tensor_factory(
+                [[]], dtype=tf.int64, ragged_rank=2
+            ),
+        },
+        {
+            "int_feature": sparse_tensor_factory(
+                values=[3], indices=[[0, 0]], dense_shape=[1, 1]
+            ),
+            "float_feature": sparse_tensor_factory(
+                values=[], indices=np.empty((0, 2)), dense_shape=[1, 0]
+            ),
+            "seq_string_feature": ragged_tensor_factory(
+                [[[b"baz"]]], dtype=tf.string, ragged_rank=2
+            ),
+            "seq_int_feature": ragged_tensor_factory(
+                [[[4]]], dtype=tf.int64, ragged_rank=2
+            ),
+        },
+    ]
 
 
 _EXPECTED_TENSORS = _CreateExamplesAsTensors()
 
 
 def _WriteInputs(filename):
-  with tf.io.TFRecordWriter(filename, "GZIP") as w:
-    for s in _SERIALIZED_EXAMPLES:
-      w.write(s)
+    with tf.io.TFRecordWriter(filename, "GZIP") as w:
+        for s in _SERIALIZED_EXAMPLES:
+            w.write(s)
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class TfSequenceExampleRecordTest(test_case.TfxBslTestCase):
-
-  @classmethod
-  def setUpClass(cls):
-    super().setUpClass()
-    cls._example_file = os.path.join(
-        FLAGS.test_tmpdir, "tfsequenceexamplerecordtest", "input.recordio.gz")
-    tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
-    _WriteInputs(cls._example_file)
-
-  def _MakeTFXIO(self, schema, raw_record_column_name=None):
-    return tf_sequence_example_record.TFSequenceExampleRecord(
-        self._example_file, schema=schema,
-        raw_record_column_name=raw_record_column_name,
-        telemetry_descriptors=_TELEMETRY_DESCRIPTORS)
-
-  def _ValidateRecordBatch(
-      self, record_batch, raw_record_column_name=None):
-    self.assertIsInstance(record_batch, pa.RecordBatch)
-    self.assertEqual(record_batch.num_rows, 3)
-    for i, field in enumerate(record_batch.schema):
-      if field.name == raw_record_column_name:
-        continue
-      if field.name == _SEQUENCE_COLUMN_NAME:
-        self.assertTrue(pa.types.is_struct(field.type))
-        for seq_column, seq_field in zip(
-            record_batch.column(i).flatten(), list(field.type)):
-          expected_array = _EXPECTED_COLUMN_VALUES[path.ColumnPath(
-              [_SEQUENCE_COLUMN_NAME, seq_field.name])]
-          self.assertTrue(
-              seq_column.equals(expected_array),
-              "Sequence column {} did not match ({} vs {})".format(
-                  seq_field.name, seq_column, expected_array))
-        continue
-      self.assertTrue(
-          record_batch.column(i).equals(_EXPECTED_COLUMN_VALUES[path.ColumnPath(
-              [field.name])]), "Column {} did not match ({} vs {}).".format(
-                  field.name, record_batch.column(i),
-                  _EXPECTED_COLUMN_VALUES[path.ColumnPath([field.name])]))
-
-    if raw_record_column_name is not None:
-      self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
-      self.assertTrue(record_batch.columns[-1].type.equals(
-          pa.large_list(pa.large_binary())))
-      self.assertEqual(record_batch.columns[-1].flatten().to_pylist(),
-                       _SERIALIZED_EXAMPLES)
-
-  @test_case.named_parameters(*[
-      dict(testcase_name="attach_raw_records", attach_raw_records=True),
-      dict(testcase_name="noattach_raw_records", attach_raw_records=False),
-  ])
-  def testE2E(self, attach_raw_records):
-    raw_column_name = "raw_records" if attach_raw_records else None
-    tfxio = self._MakeTFXIO(_SCHEMA, raw_column_name)
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, raw_column_name)
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 4)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("seq_string_feature", dict_of_tensors)
-      self.assertIn("seq_int_feature", dict_of_tensors)
-
-    p = beam.Pipeline()
-    record_batch_pcoll = p | tfxio.BeamSource(batch_size=1000)
-    beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-    pipeline_result = p.run()
-    pipeline_result.wait_until_finish()
-    telemetry_test_util.ValidateMetrics(
-        self, pipeline_result, _TELEMETRY_DESCRIPTORS,
-        "tf_sequence_example", "tfrecords_gzip")
-
-  @test_case.named_parameters(*[
-      dict(testcase_name="attach_raw_records", attach_raw_records=True),
-      dict(testcase_name="noattach_raw_records", attach_raw_records=False),
-  ])
-  def testProjection(self, attach_raw_records):
-    raw_column_name = "raw_records" if attach_raw_records else None
-    tfxio = self._MakeTFXIO(_SCHEMA, raw_column_name).Project(
-        ["int_feature", "seq_string_feature"])
-    self.assertEqual(set(["int_feature", "seq_string_feature"]),
-                     set(tfxio.TensorRepresentations()))
-
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch, raw_column_name)
-      expected_schema = tfxio.ArrowSchema()
-      self.assertTrue(
-          record_batch.schema.equals(expected_schema),
-          "actual: {}; expected: {}".format(
-              record_batch.schema, expected_schema))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 2)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("seq_string_feature", dict_of_tensors)
-
-    with beam.Pipeline() as p:
-      # Setting the betch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | tfxio.BeamSource(
-          batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testProjectionNoSequenceFeature(self):
-    tfxio = self._MakeTFXIO(_SCHEMA).Project(["int_feature"])
-    arrow_schema = tfxio.ArrowSchema()
-    self.assertLen(arrow_schema, 1)
-    self.assertIn("int_feature", arrow_schema.names)
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self._ValidateRecordBatch(record_batch)
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 1)
-      self.assertIn("int_feature", dict_of_tensors)
-
-    with beam.Pipeline() as p:
-      # Setting the betch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | tfxio.BeamSource(
-          batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testProjectEmpty(self):
-    tfxio = self._MakeTFXIO(_SCHEMA).Project([])
-    self.assertEmpty(tfxio.ArrowSchema())
-    def _AssertFn(record_batch_list):
-      self.assertLen(record_batch_list, 1)
-      record_batch = record_batch_list[0]
-      self.assertEqual(record_batch.num_columns, 0)
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertEmpty(dict_of_tensors)
-    with beam.Pipeline() as p:
-      # Setting the betch_size to make sure only one batch is generated.
-      record_batch_pcoll = p | tfxio.BeamSource(
-          batch_size=len(_EXAMPLES))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
-
-  def testTensorAdapter(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    tensor_adapter = tfxio.TensorAdapter()
-
-    def _AssertFn(tensor_dicts_list):
-      # Sort the produced list of batched tensor dicts to avoid errors due to
-      # order of elements in the produced PCollection.
-      tensor_dicts_list = sorted(
-          tensor_dicts_list, key=lambda x: x["int_feature"].values[0])
-
-      for i, tensors_dict in enumerate(tensor_dicts_list):
-        self.assertLen(tensors_dict, 4)
-        for name, tensor in tensors_dict.items():
-          self.assertTensorsEqual(
-              tensor,
-              _EXPECTED_TENSORS[i][name],
-              msg=f"For tensor {name} at index {i}")
-
-    with beam.Pipeline() as p:
-      # Expected tensors are batched with batch_size = 1.
-      record_batches = p | tfxio.BeamSource(batch_size=1)
-      tensors = record_batches | beam.Map(tensor_adapter.ToBatchTensors)
-      beam_testing_util.assert_that(tensors, _AssertFn)
-
-  def testTensorFlowDataset(self):
-    tfxio = self._MakeTFXIO(_SCHEMA)
-    # Expected tensors are batched with batch_size = 1.
-    options = dataset_options.TensorFlowDatasetOptions(
-        batch_size=1, shuffle=False, num_epochs=1)
-    for i, parsed_examples_dict in enumerate(
-        tfxio.TensorFlowDataset(options=options)):
-      self.assertLen(parsed_examples_dict, 4)
-      for name, tensor in parsed_examples_dict.items():
-        self.assertTensorsEqual(
-            tensor,
-            _EXPECTED_TENSORS[i][name],
-            msg=f"For tensor {name} at index {i}")
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls._example_file = os.path.join(
+            FLAGS.test_tmpdir, "tfsequenceexamplerecordtest", "input.recordio.gz"
+        )
+        tf.io.gfile.makedirs(os.path.dirname(cls._example_file))
+        _WriteInputs(cls._example_file)
+
+    def _MakeTFXIO(self, schema, raw_record_column_name=None):
+        return tf_sequence_example_record.TFSequenceExampleRecord(
+            self._example_file,
+            schema=schema,
+            raw_record_column_name=raw_record_column_name,
+            telemetry_descriptors=_TELEMETRY_DESCRIPTORS,
+        )
+
+    def _ValidateRecordBatch(self, record_batch, raw_record_column_name=None):
+        self.assertIsInstance(record_batch, pa.RecordBatch)
+        self.assertEqual(record_batch.num_rows, 3)
+        for i, field in enumerate(record_batch.schema):
+            if field.name == raw_record_column_name:
+                continue
+            if field.name == _SEQUENCE_COLUMN_NAME:
+                self.assertTrue(pa.types.is_struct(field.type))
+                for seq_column, seq_field in zip(
+                    record_batch.column(i).flatten(), list(field.type)
+                ):
+                    expected_array = _EXPECTED_COLUMN_VALUES[
+                        path.ColumnPath([_SEQUENCE_COLUMN_NAME, seq_field.name])
+                    ]
+                    self.assertTrue(
+                        seq_column.equals(expected_array),
+                        f"Sequence column {seq_field.name} did not match ({seq_column} vs {expected_array})",
+                    )
+                continue
+            self.assertTrue(
+                record_batch.column(i).equals(
+                    _EXPECTED_COLUMN_VALUES[path.ColumnPath([field.name])]
+                ),
+                f"Column {field.name} did not match ({record_batch.column(i)} vs {_EXPECTED_COLUMN_VALUES[path.ColumnPath([field.name])]}).",
+            )
+
+        if raw_record_column_name is not None:
+            self.assertEqual(record_batch.schema.names[-1], raw_record_column_name)
+            self.assertTrue(
+                record_batch.columns[-1].type.equals(pa.large_list(pa.large_binary()))
+            )
+            self.assertEqual(
+                record_batch.columns[-1].flatten().to_pylist(), _SERIALIZED_EXAMPLES
+            )
+
+    @test_case.named_parameters(
+        *[
+            dict(testcase_name="attach_raw_records", attach_raw_records=True),
+            dict(testcase_name="noattach_raw_records", attach_raw_records=False),
+        ]
+    )
+    def testE2E(self, attach_raw_records):
+        raw_column_name = "raw_records" if attach_raw_records else None
+        tfxio = self._MakeTFXIO(_SCHEMA, raw_column_name)
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, raw_column_name)
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 4)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("seq_string_feature", dict_of_tensors)
+            self.assertIn("seq_int_feature", dict_of_tensors)
+
+        p = beam.Pipeline()
+        record_batch_pcoll = p | tfxio.BeamSource(batch_size=1000)
+        beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+        pipeline_result = p.run()
+        pipeline_result.wait_until_finish()
+        telemetry_test_util.ValidateMetrics(
+            self,
+            pipeline_result,
+            _TELEMETRY_DESCRIPTORS,
+            "tf_sequence_example",
+            "tfrecords_gzip",
+        )
+
+    @test_case.named_parameters(
+        *[
+            dict(testcase_name="attach_raw_records", attach_raw_records=True),
+            dict(testcase_name="noattach_raw_records", attach_raw_records=False),
+        ]
+    )
+    def testProjection(self, attach_raw_records):
+        raw_column_name = "raw_records" if attach_raw_records else None
+        tfxio = self._MakeTFXIO(_SCHEMA, raw_column_name).Project(
+            ["int_feature", "seq_string_feature"]
+        )
+        self.assertEqual(
+            set(["int_feature", "seq_string_feature"]),
+            set(tfxio.TensorRepresentations()),
+        )
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch, raw_column_name)
+            expected_schema = tfxio.ArrowSchema()
+            self.assertTrue(
+                record_batch.schema.equals(expected_schema),
+                f"actual: {record_batch.schema}; expected: {expected_schema}",
+            )
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 2)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("seq_string_feature", dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            # Setting the betch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_EXAMPLES))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testProjectionNoSequenceFeature(self):
+        tfxio = self._MakeTFXIO(_SCHEMA).Project(["int_feature"])
+        arrow_schema = tfxio.ArrowSchema()
+        self.assertLen(arrow_schema, 1)
+        self.assertIn("int_feature", arrow_schema.names)
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self._ValidateRecordBatch(record_batch)
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 1)
+            self.assertIn("int_feature", dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            # Setting the betch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_EXAMPLES))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testProjectEmpty(self):
+        tfxio = self._MakeTFXIO(_SCHEMA).Project([])
+        self.assertEmpty(tfxio.ArrowSchema())
+
+        def _AssertFn(record_batch_list):
+            self.assertLen(record_batch_list, 1)
+            record_batch = record_batch_list[0]
+            self.assertEqual(record_batch.num_columns, 0)
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertEmpty(dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            # Setting the betch_size to make sure only one batch is generated.
+            record_batch_pcoll = p | tfxio.BeamSource(batch_size=len(_EXAMPLES))
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+
+    def testTensorAdapter(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        tensor_adapter = tfxio.TensorAdapter()
+
+        def _AssertFn(tensor_dicts_list):
+            # Sort the produced list of batched tensor dicts to avoid errors due to
+            # order of elements in the produced PCollection.
+            tensor_dicts_list = sorted(
+                tensor_dicts_list, key=lambda x: x["int_feature"].values[0]
+            )
+
+            for i, tensors_dict in enumerate(tensor_dicts_list):
+                self.assertLen(tensors_dict, 4)
+                for name, tensor in tensors_dict.items():
+                    self.assertTensorsEqual(
+                        tensor,
+                        _EXPECTED_TENSORS[i][name],
+                        msg=f"For tensor {name} at index {i}",
+                    )
+
+        with beam.Pipeline() as p:
+            # Expected tensors are batched with batch_size = 1.
+            record_batches = p | tfxio.BeamSource(batch_size=1)
+            tensors = record_batches | beam.Map(tensor_adapter.ToBatchTensors)
+            beam_testing_util.assert_that(tensors, _AssertFn)
+
+    def testTensorFlowDataset(self):
+        tfxio = self._MakeTFXIO(_SCHEMA)
+        # Expected tensors are batched with batch_size = 1.
+        options = dataset_options.TensorFlowDatasetOptions(
+            batch_size=1, shuffle=False, num_epochs=1
+        )
+        for i, parsed_examples_dict in enumerate(
+            tfxio.TensorFlowDataset(options=options)
+        ):
+            self.assertLen(parsed_examples_dict, 4)
+            for name, tensor in parsed_examples_dict.items():
+                self.assertTensorsEqual(
+                    tensor,
+                    _EXPECTED_TENSORS[i][name],
+                    msg=f"For tensor {name} at index {i}",
+                )
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class TFSequenceExampleBeamRecordTest(test_case.TfxBslTestCase):
-
-  def testE2E(self):
-    raw_record_column_name = "raw_record"
-    tfxio = tf_sequence_example_record.TFSequenceExampleBeamRecord(
-        physical_format="inmem",
-        telemetry_descriptors=["some", "component"],
-        schema=_SCHEMA,
-        raw_record_column_name=raw_record_column_name,
-    )
-
-    def _AssertFn(record_batches):
-      self.assertLen(record_batches, 1)
-      record_batch = record_batches[0]
-      self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
-      tensor_adapter = tfxio.TensorAdapter()
-      dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
-      self.assertLen(dict_of_tensors, 4)
-      self.assertIn("int_feature", dict_of_tensors)
-      self.assertIn("float_feature", dict_of_tensors)
-      self.assertIn("seq_string_feature", dict_of_tensors)
-      self.assertIn("seq_int_feature", dict_of_tensors)
-
-    with beam.Pipeline() as p:
-      record_batch_pcoll = (
-          p
-          | "CreateInMemRecords" >> beam.Create(_SERIALIZED_EXAMPLES)
-          | "BeamSource" >>
-          tfxio.BeamSource(batch_size=len(_SERIALIZED_EXAMPLES)))
-      beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
+    def testE2E(self):
+        raw_record_column_name = "raw_record"
+        tfxio = tf_sequence_example_record.TFSequenceExampleBeamRecord(
+            physical_format="inmem",
+            telemetry_descriptors=["some", "component"],
+            schema=_SCHEMA,
+            raw_record_column_name=raw_record_column_name,
+        )
+
+        def _AssertFn(record_batches):
+            self.assertLen(record_batches, 1)
+            record_batch = record_batches[0]
+            self.assertTrue(record_batch.schema.equals(tfxio.ArrowSchema()))
+            tensor_adapter = tfxio.TensorAdapter()
+            dict_of_tensors = tensor_adapter.ToBatchTensors(record_batch)
+            self.assertLen(dict_of_tensors, 4)
+            self.assertIn("int_feature", dict_of_tensors)
+            self.assertIn("float_feature", dict_of_tensors)
+            self.assertIn("seq_string_feature", dict_of_tensors)
+            self.assertIn("seq_int_feature", dict_of_tensors)
+
+        with beam.Pipeline() as p:
+            record_batch_pcoll = (
+                p
+                | "CreateInMemRecords" >> beam.Create(_SERIALIZED_EXAMPLES)
+                | "BeamSource" >> tfxio.BeamSource(batch_size=len(_SERIALIZED_EXAMPLES))
+            )
+            beam_testing_util.assert_that(record_batch_pcoll, _AssertFn)
 
 
 if __name__ == "__main__":
-  test_case.main()
+    test_case.main()
diff --git a/tfx_bsl/tfxio/tfxio.py b/tfx_bsl/tfxio/tfxio.py
index 0749c14e..4107fbee 100644
--- a/tfx_bsl/tfxio/tfxio.py
+++ b/tfx_bsl/tfxio/tfxio.py
@@ -25,185 +25,196 @@
 
 import abc
 from typing import Iterator, List, Optional, Text
+
 import apache_beam as beam
 import pyarrow as pa
 import tensorflow as tf
-from tfx_bsl.tfxio import dataset_options
-from tfx_bsl.tfxio import tensor_adapter
 
+from tfx_bsl.tfxio import dataset_options, tensor_adapter
+
+
+class TFXIO(metaclass=abc.ABCMeta):
+    """Abstract basic class of all TFXIO API implementations."""
 
-class TFXIO(object, metaclass=abc.ABCMeta):
-  """Abstract basic class of all TFXIO API implementations."""
+    @abc.abstractmethod
+    def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
+        """Returns a beam `PTransform` that produces `PCollection[pa.RecordBatch]`.
 
-  @abc.abstractmethod
-  def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
-    """Returns a beam `PTransform` that produces `PCollection[pa.RecordBatch]`.
-
-    May NOT raise an error if the TFMD schema was not provided at construction
-    time.
-
-    If a TFMD schema was provided at construction time, all the
-    `pa.RecordBatch`es in the result `PCollection` must be of the same schema
-    returned by `self.ArrowSchema`. If a TFMD schema was not provided, the
-    `pa.RecordBatch`es might not be of the same schema (they may contain
-    different numbers of columns).
-
-    Args:
-      batch_size: if not None, the `pa.RecordBatch` produced will be of the
-        specified size. Otherwise it's automatically tuned by Beam.
-    """
-
-  @abc.abstractmethod
-  def RecordBatches(
-      self, options: dataset_options.RecordBatchesOptions
-  ) -> Iterator[pa.RecordBatch]:
-    """Returns an iterable of record batches.
-
-    This can be used outside of Apache Beam or TensorFlow to access data.
-
-    Args:
-      options: An options object for iterating over record batches. Look at
-        `dataset_options.RecordBatchesOptions` for more details.
-    """
-
-  @abc.abstractmethod
-  def ArrowSchema(self) -> pa.Schema:
-    """Returns the schema of the `RecordBatch` produced by `self.BeamSource()`.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-    """
-
-  @abc.abstractmethod
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    """Returns the `TensorRepresentations`.
-
-    These `TensorRepresentation`s describe the tensors or composite tensors
-    produced by the `TensorAdapter` created from `self.TensorAdapter()` or
-    the tf.data.Dataset created from `self.TensorFlowDataset()`.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-    May raise an error if the tensor representations are invalid.
-    """
-
-  @abc.abstractmethod
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    """Returns a tf.data.Dataset of TF inputs.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-
-    Args:
-      options: an options object for the tf.data.Dataset. Look at
-        `dataset_options.TensorFlowDatasetOptions` for more details.
-    """
-
-  @abc.abstractmethod
-  def _ProjectImpl(self, tensor_names: List[Text]) -> "TFXIO":
-    """Sub-classes should implement this interface to perform projections.
-
-    It should return a `TFXIO` instance that is the same as `self` except that:
-      - Only columns needed for given tensor_names are guaranteed to be
-        produced by `self.BeamSource()`
-      - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
-        to contain only those tensors.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-
-    Args:
-      tensor_names: a set of tensor names.
-    """
-
-  # final
-  def Project(self, tensor_names: List[Text]) -> "TFXIO":
-    """Projects the dataset represented by this TFXIO.
-
-    A Projected TFXIO:
-    - Only columns needed for given tensor_names are guaranteed to be
-      produced by `self.BeamSource()`
-    - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
-      to contain only those tensors.
-    - It retains a reference to the very original TFXIO, so its TensorAdapter
-      knows about the specs of the tensors that would be produced by the
-      original TensorAdapter. Also see `TensorAdapter.OriginalTensorSpec()`.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-
-    Args:
-      tensor_names: a set of tensor names.
-
-    Returns:
-      A `TFXIO` instance that is the same as `self` except that:
-      - Only columns needed for given tensor_names are guaranteed to be
-        produced by `self.BeamSource()`
-      - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
-        to contain only those tensors.
-    """
-    if isinstance(self, _ProjectedTFXIO):
-      # pylint: disable=protected-access
-      return _ProjectedTFXIO(self.origin,
-                             self.projected._ProjectImpl(tensor_names))
-    return _ProjectedTFXIO(self, self._ProjectImpl(tensor_names))
-
-  # final
-  def TensorAdapterConfig(self) -> tensor_adapter.TensorAdapterConfig:
-    """Returns the config to initialize a `TensorAdapter`.
-
-    Returns:
-      a `TensorAdapterConfig` that is the same as what is used to initialize the
-      `TensorAdapter` returned by `self.TensorAdapter()`.
-    """
-    return tensor_adapter.TensorAdapterConfig(
-        self.ArrowSchema(), self.TensorRepresentations())
-
-  # final
-  def TensorAdapter(self) -> tensor_adapter.TensorAdapter:
-    """Returns a TensorAdapter that converts pa.RecordBatch to TF inputs.
-
-    May raise an error if the TFMD schema was not provided at construction time.
-    """
-    return tensor_adapter.TensorAdapter(self.TensorAdapterConfig())
+        May NOT raise an error if the TFMD schema was not provided at construction
+        time.
+
+        If a TFMD schema was provided at construction time, all the
+        `pa.RecordBatch`es in the result `PCollection` must be of the same schema
+        returned by `self.ArrowSchema`. If a TFMD schema was not provided, the
+        `pa.RecordBatch`es might not be of the same schema (they may contain
+        different numbers of columns).
+
+        Args:
+        ----
+          batch_size: if not None, the `pa.RecordBatch` produced will be of the
+            specified size. Otherwise it's automatically tuned by Beam.
+        """
+
+    @abc.abstractmethod
+    def RecordBatches(
+        self, options: dataset_options.RecordBatchesOptions
+    ) -> Iterator[pa.RecordBatch]:
+        """Returns an iterable of record batches.
+
+        This can be used outside of Apache Beam or TensorFlow to access data.
+
+        Args:
+        ----
+          options: An options object for iterating over record batches. Look at
+            `dataset_options.RecordBatchesOptions` for more details.
+        """
+
+    @abc.abstractmethod
+    def ArrowSchema(self) -> pa.Schema:
+        """Returns the schema of the `RecordBatch` produced by `self.BeamSource()`.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+        """
+
+    @abc.abstractmethod
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        """Returns the `TensorRepresentations`.
+
+        These `TensorRepresentation`s describe the tensors or composite tensors
+        produced by the `TensorAdapter` created from `self.TensorAdapter()` or
+        the tf.data.Dataset created from `self.TensorFlowDataset()`.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+        May raise an error if the tensor representations are invalid.
+        """
+
+    @abc.abstractmethod
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        """Returns a tf.data.Dataset of TF inputs.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+
+        Args:
+        ----
+          options: an options object for the tf.data.Dataset. Look at
+            `dataset_options.TensorFlowDatasetOptions` for more details.
+        """
+
+    @abc.abstractmethod
+    def _ProjectImpl(self, tensor_names: List[str]) -> "TFXIO":
+        """Sub-classes should implement this interface to perform projections.
+
+        It should return a `TFXIO` instance that is the same as `self` except that:
+          - Only columns needed for given tensor_names are guaranteed to be
+            produced by `self.BeamSource()`
+          - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
+            to contain only those tensors.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+
+        Args:
+        ----
+          tensor_names: a set of tensor names.
+        """
+
+    # final
+    def Project(self, tensor_names: List[str]) -> "TFXIO":
+        """Projects the dataset represented by this TFXIO.
+
+        A Projected TFXIO:
+        - Only columns needed for given tensor_names are guaranteed to be
+          produced by `self.BeamSource()`
+        - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
+          to contain only those tensors.
+        - It retains a reference to the very original TFXIO, so its TensorAdapter
+          knows about the specs of the tensors that would be produced by the
+          original TensorAdapter. Also see `TensorAdapter.OriginalTensorSpec()`.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+
+        Args:
+        ----
+          tensor_names: a set of tensor names.
+
+        Returns:
+        -------
+          A `TFXIO` instance that is the same as `self` except that:
+          - Only columns needed for given tensor_names are guaranteed to be
+            produced by `self.BeamSource()`
+          - `self.TensorAdapterConfig()` and `self.TensorFlowDataset()` are trimmed
+            to contain only those tensors.
+        """
+        if isinstance(self, _ProjectedTFXIO):
+            # pylint: disable=protected-access
+            return _ProjectedTFXIO(
+                self.origin, self.projected._ProjectImpl(tensor_names)
+            )
+        return _ProjectedTFXIO(self, self._ProjectImpl(tensor_names))
+
+    # final
+    def TensorAdapterConfig(self) -> tensor_adapter.TensorAdapterConfig:
+        """Returns the config to initialize a `TensorAdapter`.
+
+        Returns
+        -------
+          a `TensorAdapterConfig` that is the same as what is used to initialize the
+          `TensorAdapter` returned by `self.TensorAdapter()`.
+        """
+        return tensor_adapter.TensorAdapterConfig(
+            self.ArrowSchema(), self.TensorRepresentations()
+        )
+
+    # final
+    def TensorAdapter(self) -> tensor_adapter.TensorAdapter:
+        """Returns a TensorAdapter that converts pa.RecordBatch to TF inputs.
+
+        May raise an error if the TFMD schema was not provided at construction time.
+        """
+        return tensor_adapter.TensorAdapter(self.TensorAdapterConfig())
 
 
 class _ProjectedTFXIO(TFXIO):
-  """A wrapper of a projected TFXIO to track its origin."""
+    """A wrapper of a projected TFXIO to track its origin."""
 
-  def __init__(self, origin: TFXIO, projected: TFXIO):
-    self._origin = origin
-    self._projected = projected
+    def __init__(self, origin: TFXIO, projected: TFXIO):
+        self._origin = origin
+        self._projected = projected
 
-  @property
-  def origin(self) -> TFXIO:
-    return self._origin
+    @property
+    def origin(self) -> TFXIO:
+        return self._origin
 
-  @property
-  def projected(self) -> TFXIO:
-    return self._projected
+    @property
+    def projected(self) -> TFXIO:
+        return self._projected
 
-  def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
-    return self.projected.BeamSource(batch_size)
+    def BeamSource(self, batch_size: Optional[int] = None) -> beam.PTransform:
+        return self.projected.BeamSource(batch_size)
 
-  def RecordBatches(
-      self, options: dataset_options.RecordBatchesOptions
-  ) -> Iterator[pa.RecordBatch]:
-    return self.projected.RecordBatches(options)
+    def RecordBatches(
+        self, options: dataset_options.RecordBatchesOptions
+    ) -> Iterator[pa.RecordBatch]:
+        return self.projected.RecordBatches(options)
 
-  def ArrowSchema(self) -> pa.Schema:
-    return self.projected.ArrowSchema()
+    def ArrowSchema(self) -> pa.Schema:
+        return self.projected.ArrowSchema()
 
-  def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
-    return self.projected.TensorRepresentations()
+    def TensorRepresentations(self) -> tensor_adapter.TensorRepresentations:
+        return self.projected.TensorRepresentations()
 
-  def TensorFlowDataset(
-      self,
-      options: dataset_options.TensorFlowDatasetOptions) -> tf.data.Dataset:
-    return self.projected.TensorFlowDataset(options)
+    def TensorFlowDataset(
+        self, options: dataset_options.TensorFlowDatasetOptions
+    ) -> tf.data.Dataset:
+        return self.projected.TensorFlowDataset(options)
 
-  def _ProjectImpl(self, unused_tensor_names: List[Text]) -> "TFXIO":
-    raise ValueError("This should never be called.")
+    def _ProjectImpl(self, unused_tensor_names: List[str]) -> "TFXIO":
+        raise ValueError("This should never be called.")
 
-  def TensorAdapterConfig(self) -> tensor_adapter.TensorAdapterConfig:
-    return tensor_adapter.TensorAdapterConfig(
-        self.projected.ArrowSchema(),
-        self.projected.TensorRepresentations(),
-        original_type_specs=self.origin.TensorAdapter().TypeSpecs())
+    def TensorAdapterConfig(self) -> tensor_adapter.TensorAdapterConfig:
+        return tensor_adapter.TensorAdapterConfig(
+            self.projected.ArrowSchema(),
+            self.projected.TensorRepresentations(),
+            original_type_specs=self.origin.TensorAdapter().TypeSpecs(),
+        )
diff --git a/tfx_bsl/tfxio/tfxio_test.py b/tfx_bsl/tfxio/tfxio_test.py
index 95690a30..3edb899d 100644
--- a/tfx_bsl/tfxio/tfxio_test.py
+++ b/tfx_bsl/tfxio/tfxio_test.py
@@ -14,29 +14,27 @@
 """Tests for tfx_bsl.tfxio.tfxio."""
 
 import pyarrow as pa
-from tfx_bsl.tfxio import tfxio
-
-from google.protobuf import text_format
 from absl.testing import absltest
+from google.protobuf import text_format
 from tensorflow_metadata.proto.v0 import schema_pb2
 
+from tfx_bsl.tfxio import tfxio
+
 
 class _FakeTFXIO(tfxio.TFXIO):
-  """A fake TFXIO for testing the projection origin tracking."""
+    """A fake TFXIO for testing the projection origin tracking."""
 
-  def __init__(self, columns):
-    super().__init__()
-    self._columns = columns
+    def __init__(self, columns):
+        super().__init__()
+        self._columns = columns
 
-  def ArrowSchema(self):
-    return pa.schema([
-        pa.field(c, pa.list_(pa.int32())) for c in self._columns
-    ])
+    def ArrowSchema(self):
+        return pa.schema([pa.field(c, pa.list_(pa.int32())) for c in self._columns])
 
-  def TensorRepresentations(self):
-    return {  # pylint: disable=g-complex-comprehension
-        c: text_format.Parse(
-            """
+    def TensorRepresentations(self):
+        return {  # pylint: disable=g-complex-comprehension
+            c: text_format.Parse(
+                """
               dense_tensor {
                 column_name: "%s"
                 shape {
@@ -44,49 +42,53 @@ def TensorRepresentations(self):
                     size: 1
                   }
                 }
-              }""" % c, schema_pb2.TensorRepresentation())
-        for c in self._columns
-    }
+              }"""
+                % c,
+                schema_pb2.TensorRepresentation(),
+            )
+            for c in self._columns
+        }
 
-  def _ProjectImpl(self, columns):
-    projected_columns = set(columns)
-    for c in projected_columns:
-      if c not in self._columns:
-        raise ValueError("Unexpected column")
-    return _FakeTFXIO(list(projected_columns))
+    def _ProjectImpl(self, columns):
+        projected_columns = set(columns)
+        for c in projected_columns:
+            if c not in self._columns:
+                raise ValueError("Unexpected column")
+        return _FakeTFXIO(list(projected_columns))
 
-  # not used in tests.
-  def BeamSource(self, batch_size):
-    raise NotImplementedError
+    # not used in tests.
+    def BeamSource(self, batch_size):
+        raise NotImplementedError
 
-  # not used in tests.
-  def RecordBatches(self, options):
-    raise NotImplementedError
+    # not used in tests.
+    def RecordBatches(self, options):
+        raise NotImplementedError
 
-  # not used in tests.
-  def TensorFlowDataset(self, options):
-    raise NotImplementedError
+    # not used in tests.
+    def TensorFlowDataset(self, options):
+        raise NotImplementedError
 
 
 class TfxioTest(absltest.TestCase):
-
-  def testProjection(self):
-    origin = _FakeTFXIO(["column1", "column2", "column3"])
-    origin_adapter = origin.TensorAdapter()
-    self.assertLen(origin_adapter.TypeSpecs(), 3)
-    projected = origin.Project(["column2", "column3"])
-    projected_adapter = projected.TensorAdapter()
-    self.assertLen(projected_adapter.TypeSpecs(), 2)
-    self.assertEqual(origin_adapter.TypeSpecs(),
-                     projected_adapter.OriginalTypeSpecs())
-
-    # Project again, but the origin should still be `origin`.
-    projected = projected.Project(["column3"])
-    projected_adapter = projected.TensorAdapter()
-    self.assertLen(projected_adapter.TypeSpecs(), 1)
-    self.assertEqual(origin_adapter.TypeSpecs(),
-                     projected_adapter.OriginalTypeSpecs())
+    def testProjection(self):
+        origin = _FakeTFXIO(["column1", "column2", "column3"])
+        origin_adapter = origin.TensorAdapter()
+        self.assertLen(origin_adapter.TypeSpecs(), 3)
+        projected = origin.Project(["column2", "column3"])
+        projected_adapter = projected.TensorAdapter()
+        self.assertLen(projected_adapter.TypeSpecs(), 2)
+        self.assertEqual(
+            origin_adapter.TypeSpecs(), projected_adapter.OriginalTypeSpecs()
+        )
+
+        # Project again, but the origin should still be `origin`.
+        projected = projected.Project(["column3"])
+        projected_adapter = projected.TensorAdapter()
+        self.assertLen(projected_adapter.TypeSpecs(), 1)
+        self.assertEqual(
+            origin_adapter.TypeSpecs(), projected_adapter.OriginalTypeSpecs()
+        )
 
 
 if __name__ == "__main__":
-  absltest.main()
+    absltest.main()
diff --git a/tfx_bsl/tools/build_docs.py b/tfx_bsl/tools/build_docs.py
index f4b5f245..95e77731 100644
--- a/tfx_bsl/tools/build_docs.py
+++ b/tfx_bsl/tools/build_docs.py
@@ -27,75 +27,84 @@
 python tfx_bsl/tools/build_docs.py --output_dir=/tmp/tfx_bsl_api
 ```
 """
-import inspect
 
-from absl import app
-from absl import flags
+import inspect
 
 import apache_beam as beam
+from absl import app, flags
+from tensorflow_docs.api_generator import doc_controls, generate_lib
 
-from tensorflow_docs.api_generator import doc_controls
-from tensorflow_docs.api_generator import generate_lib
 from tfx_bsl import public
+
 # pylint: disable=unused-import
 from tfx_bsl.public import beam as tfx_bsl_beam
 from tfx_bsl.public import proto as tfx_bsl_proto
 from tfx_bsl.public import tfxio as tfx_bsl_tfxio
+
 # pylint: enable=unused-import
 
-flags.DEFINE_string("output_dir",
-                    "/tmp/tfx_bsl_api", "Where to output the docs")
+flags.DEFINE_string("output_dir", "/tmp/tfx_bsl_api", "Where to output the docs")
 flags.DEFINE_string(
     "code_url_prefix",
     "https://github.com/tensorflow/tfx-bsl/blob/master/tfx_bsl/public",
-    "The url prefix for links to code.")
+    "The url prefix for links to code.",
+)
 
-flags.DEFINE_bool("search_hints", True,
-                  "Include metadata search hints in the generated files")
+flags.DEFINE_bool(
+    "search_hints", True, "Include metadata search hints in the generated files"
+)
 
-flags.DEFINE_string("site_path", "/tfx/tfx_bsl/api_docs/python",
-                    "Path prefix in the _toc.yaml")
+flags.DEFINE_string(
+    "site_path", "/tfx/tfx_bsl/api_docs/python", "Path prefix in the _toc.yaml"
+)
 
 FLAGS = flags.FLAGS
 
 
 def _filter_class_attributes(path, parent, children):
-  """Filter out class attirubtes that are part of the PTransform API."""
-  del path
-  skip_class_attributes = {
-      "expand", "label", "from_runner_api", "register_urn", "side_inputs"
-  }
-  if inspect.isclass(parent):
-    children = [(name, child)
-                for (name, child) in children
-                if name not in skip_class_attributes]
-  return children
+    """Filter out class attirubtes that are part of the PTransform API."""
+    del path
+    skip_class_attributes = {
+        "expand",
+        "label",
+        "from_runner_api",
+        "register_urn",
+        "side_inputs",
+    }
+    if inspect.isclass(parent):
+        children = [
+            (name, child)
+            for (name, child) in children
+            if name not in skip_class_attributes
+        ]
+    return children
 
 
 def main(args):
-  if args[1:]:
-    raise ValueError("Unrecognized Command line args", args[1:])
-
-  for name, value in inspect.getmembers(beam.PTransform):
-    # This ensures that the methods of PTransform are not documented in any
-    # derived classes.
-    if name == "__init__":
-      continue
-    try:
-      doc_controls.do_not_doc_inheritable(value)
-    except (TypeError, AttributeError):
-      pass
-
-  doc_generator = generate_lib.DocGenerator(
-      root_title="TensorFlow Extended Basic Shared Libraries",
-      py_modules=[("tfx_bsl.public", public)],
-      code_url_prefix=FLAGS.code_url_prefix,
-      search_hints=FLAGS.search_hints,
-      site_path=FLAGS.site_path,
-      callbacks=[_filter_class_attributes])
-
-  return doc_generator.build(output_dir=FLAGS.output_dir)
+    if args[1:]:
+        raise ValueError("Unrecognized Command line args", args[1:])
+
+    for name, value in inspect.getmembers(beam.PTransform):
+        # This ensures that the methods of PTransform are not documented in any
+        # derived classes.
+        if name == "__init__":
+            continue
+        try:
+            doc_controls.do_not_doc_inheritable(value)
+        except (TypeError, AttributeError):
+            pass
+
+    doc_generator = generate_lib.DocGenerator(
+        root_title="TensorFlow Extended Basic Shared Libraries",
+        py_modules=[("tfx_bsl.public", public)],
+        code_url_prefix=FLAGS.code_url_prefix,
+        search_hints=FLAGS.search_hints,
+        site_path=FLAGS.site_path,
+        callbacks=[_filter_class_attributes],
+    )
+
+    return doc_generator.build(output_dir=FLAGS.output_dir)
 
 
 if __name__ == "__main__":
-  app.run(main)
+    app.run(main)
diff --git a/tfx_bsl/tools/docker_build/manylinux2010-bazel/Dockerfile.manylinux2010-bazel b/tfx_bsl/tools/docker_build/manylinux2010-bazel/Dockerfile.manylinux2010-bazel
index 17fab07f..72f65b61 100644
--- a/tfx_bsl/tools/docker_build/manylinux2010-bazel/Dockerfile.manylinux2010-bazel
+++ b/tfx_bsl/tools/docker_build/manylinux2010-bazel/Dockerfile.manylinux2010-bazel
@@ -26,4 +26,4 @@ ENV PATH="/opt/python/cp39-cp39m/bin:${PATH}"
 ENV EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk"
 ENV BAZEL_LINKLIBS=-lstdc++:-lm
 RUN wget "https://github.com/bazelbuild/bazel/releases/download/6.5.0/bazel-6.5.0-installer-linux-x86_64.sh" && \
-  /bin/bash ./bazel-6.5.0-installer-linux-x86_64.sh
\ No newline at end of file
+  /bin/bash ./bazel-6.5.0-installer-linux-x86_64.sh
diff --git a/tfx_bsl/types/common_types.py b/tfx_bsl/types/common_types.py
index 78785deb..bcf05664 100644
--- a/tfx_bsl/types/common_types.py
+++ b/tfx_bsl/types/common_types.py
@@ -18,12 +18,14 @@
 import numpy as np
 import tensorflow as tf
 
-FeatureSpecType = Union[tf.io.FixedLenFeature, tf.io.VarLenFeature,
-                        tf.io.SparseFeature, tf.io.RaggedFeature]
+FeatureSpecType = Union[
+    tf.io.FixedLenFeature, tf.io.VarLenFeature, tf.io.SparseFeature, tf.io.RaggedFeature
+]
 
 TensorType = Union[tf.Tensor, tf.SparseTensor, tf.RaggedTensor]
 
-TensorValueType = Union[np.ndarray, tf.compat.v1.SparseTensorValue,
-                        tf.compat.v1.ragged.RaggedTensorValue]
+TensorValueType = Union[
+    np.ndarray, tf.compat.v1.SparseTensorValue, tf.compat.v1.ragged.RaggedTensorValue
+]
 
 TensorAlike = Union[TensorType, TensorValueType]
diff --git a/tfx_bsl/types/tfx_namedtuple.py b/tfx_bsl/types/tfx_namedtuple.py
index 6b5a004a..f516d885 100644
--- a/tfx_bsl/types/tfx_namedtuple.py
+++ b/tfx_bsl/types/tfx_namedtuple.py
@@ -22,41 +22,42 @@
 TODO(https://issues.apache.org/jira/browse/SPARK-22674): remove this workaround
 once the hack is removed from PySpark.
 """
+
 import collections
 import sys
 import typing
 
 
 def _patch_namedtuple(cls):
-  """Helper function that patches namedtuple class to prevent PySpark hack."""
+    """Helper function that patches namedtuple class to prevent PySpark hack."""
 
-  def reduce(self):
-    return (self.__class__, tuple(self))
+    def reduce(self):
+        return (self.__class__, tuple(self))
 
-  # Classes for which `__reduce__` is defined don't get hacked by PySpark.
-  cls.__reduce__ = reduce
+    # Classes for which `__reduce__` is defined don't get hacked by PySpark.
+    cls.__reduce__ = reduce
 
-  # For pickling to work, the __module__ variable needs to be set to the frame
-  # where the named tuple is created.
-  try:
-    cls.__module__ = sys._getframe(2).f_globals.get('__name__', '__main__')  # pylint: disable=protected-access
-  except (AttributeError, ValueError):
-    pass
+    # For pickling to work, the __module__ variable needs to be set to the frame
+    # where the named tuple is created.
+    try:
+        cls.__module__ = sys._getframe(2).f_globals.get("__name__", "__main__")  # pylint: disable=protected-access
+    except (AttributeError, ValueError):
+        pass
 
 
 def namedtuple(typename, field_names, *, rename=False):
-  """Wrapper around `collections.namedtuple` to provide PySpark compatibility."""
-
-  result = collections.namedtuple(typename, field_names, rename=rename)
-  _patch_namedtuple(result)
-  return result
+    """Wrapper around `collections.namedtuple` to provide PySpark compatibility."""
+    result = collections.namedtuple(typename, field_names, rename=rename)
+    _patch_namedtuple(result)
+    return result
 
 
 class TypedNamedTuple:
-  """Wrapper around `typing.NamedTuple` to provide PySpark compatibility."""
-  __slots__ = ()
+    """Wrapper around `typing.NamedTuple` to provide PySpark compatibility."""
 
-  def __new__(cls, typename, fields=None):
-    result = typing.NamedTuple(typename, fields)
-    _patch_namedtuple(result)
-    return result
+    __slots__ = ()
+
+    def __new__(cls, typename, fields=None):
+        result = typing.NamedTuple(typename, fields)
+        _patch_namedtuple(result)
+        return result
diff --git a/tfx_bsl/types/tfx_namedtuple_test.py b/tfx_bsl/types/tfx_namedtuple_test.py
index 9f688af6..03e90036 100644
--- a/tfx_bsl/types/tfx_namedtuple_test.py
+++ b/tfx_bsl/types/tfx_namedtuple_test.py
@@ -12,122 +12,126 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for tfx_bsl.types.tfx_namedtuple."""
+
 import collections
 import pickle
-import pytest
 import sys
 import typing
 
-from tfx_bsl.types import tfx_namedtuple
-
+import pytest
 from absl.testing import absltest
 
+from tfx_bsl.types import tfx_namedtuple
+
 _cls = {}
-_TFXNamedTuple = tfx_namedtuple.namedtuple('_TFXNamedTuple', ['attrib'])
-_TFXTypedNamedTuple = tfx_namedtuple.TypedNamedTuple('_TFXTypedNamedTuple',
-                                                     [('attrib', int)])
-_NamedTuple = collections.namedtuple('_NamedTuple', ['attrib'])
-_TypedNamedTuple = typing.NamedTuple('_TypedNamedTuple', [('attrib', int)])
+_TFXNamedTuple = tfx_namedtuple.namedtuple("_TFXNamedTuple", ["attrib"])
+_TFXTypedNamedTuple = tfx_namedtuple.TypedNamedTuple(
+    "_TFXTypedNamedTuple", [("attrib", int)]
+)
+_NamedTuple = collections.namedtuple("_NamedTuple", ["attrib"])
 
 
-class _TFXNamedTupleSubclass(_TFXNamedTuple):
+class _TypedNamedTuple(typing.NamedTuple):
+    attrib: int
+
 
-  def __str__(self):
-    return 'hi, I am _TFXNamedTupleSubclass'
+class _TFXNamedTupleSubclass(_TFXNamedTuple):
+    def __str__(self):
+        return "hi, I am _TFXNamedTupleSubclass"
 
 
 class _TFXTypedNamedTupleSubclass(_TFXTypedNamedTuple):
-
-  def __str__(self):
-    return 'hi, I am _TFXTypedNamedTupleSubclass'
+    def __str__(self):
+        return "hi, I am _TFXTypedNamedTupleSubclass"
 
 
 class _NamedTupleSubclass(_NamedTuple):
-
-  def __str__(self):
-    return 'hi, I am _NamedTupleSubclass'
+    def __str__(self):
+        return "hi, I am _NamedTupleSubclass"
 
 
 class _TypedNamedTupleSubclass(_TypedNamedTuple):
-
-  def __str__(self):
-    return 'hi, I am _TypedNamedTupleSubclass'
+    def __str__(self):
+        return "hi, I am _TypedNamedTupleSubclass"
 
 
 def _restore(name, fields, value):
-  """Restore an object of namedtuple."""
-  key = (name, fields)
-  cls = _cls.get(key)
-  if cls is None:
-    cls = collections.namedtuple(name, fields)
-    _cls[key] = cls
-  return cls(*value)
+    """Restore an object of namedtuple."""
+    key = (name, fields)
+    cls = _cls.get(key)
+    if cls is None:
+        cls = collections.namedtuple(name, fields)
+        _cls[key] = cls
+    return cls(*value)
 
 
 def _hack_namedtuple(cls):
-  """Make class generated by namedtuple picklable."""
-  name = cls.__name__
-  fields = cls._fields
+    """Make class generated by namedtuple picklable."""
+    name = cls.__name__
+    fields = cls._fields
 
-  def reduce(self):
-    return (_restore, (name, fields, tuple(self)))
+    def reduce(self):
+        return (_restore, (name, fields, tuple(self)))
 
-  cls.__reduce__ = reduce
-  cls._is_namedtuple_ = True
-  return cls
+    cls.__reduce__ = reduce
+    cls._is_namedtuple_ = True
+    return cls
 
 
 @pytest.mark.xfail(run=False, reason="This test fails and needs to be fixed.")
 class TFXNamedtupleTest(absltest.TestCase):
-
-  def testPickling(self):
-    tfx_namedtuple_child = _TFXNamedTupleSubclass(1)
-    tfx_typed_namedtuple_child = _TFXTypedNamedTupleSubclass(2)
-    namedtuple_child = _NamedTupleSubclass(3)
-    typed_namedtuple_child = _TypedNamedTupleSubclass(4)
-
-    # This emulates hack that is done in PySpark.
-    for obj in sys.modules['__main__'].__dict__.values():
-      if (isinstance(obj, type) and obj.__base__ is tuple and
-          hasattr(obj, '_fields') and '__reduce__' not in obj.__dict__):
-        _hack_namedtuple(obj)  # hack inplace
-
-    # Subclasses are preserved.
-    self.assertIsInstance(tfx_namedtuple_child, _TFXNamedTupleSubclass)
-    self.assertEqual(
-        str(tfx_namedtuple_child), 'hi, I am _TFXNamedTupleSubclass')
-    self.assertIsInstance(tfx_typed_namedtuple_child,
-                          _TFXTypedNamedTupleSubclass)
-    self.assertEqual(
-        str(tfx_typed_namedtuple_child), 'hi, I am _TFXTypedNamedTupleSubclass')
-    self.assertIsInstance(namedtuple_child, _NamedTupleSubclass)
-    self.assertEqual(str(namedtuple_child), 'hi, I am _NamedTupleSubclass')
-    self.assertIsInstance(typed_namedtuple_child, _TypedNamedTupleSubclass)
-    self.assertEqual(
-        str(typed_namedtuple_child), 'hi, I am _TypedNamedTupleSubclass')
-
-    # Perform pickling.
-    tfx_namedtuple_child = pickle.loads(pickle.dumps(tfx_namedtuple_child))
-    tfx_typed_namedtuple_child = pickle.loads(
-        pickle.dumps(tfx_typed_namedtuple_child))
-    namedtuple_child = pickle.loads(pickle.dumps(namedtuple_child))
-    typed_namedtuple_child = pickle.loads(pickle.dumps(typed_namedtuple_child))
-
-    # `_TFXNamedTupleSubclass` and `_TFXTypedNamedTupleSubclass` are preserved,
-    # but the `_NamedTupleSubclass` and `_TypedNamedTupleSubclass` instances
-    # are pickled and unpickled as their parent classes.
-    self.assertIsInstance(tfx_namedtuple_child, _TFXNamedTupleSubclass)
-    self.assertEqual(
-        str(tfx_namedtuple_child), 'hi, I am _TFXNamedTupleSubclass')
-    self.assertIsInstance(tfx_typed_namedtuple_child,
-                          _TFXTypedNamedTupleSubclass)
-    self.assertEqual(
-        str(tfx_typed_namedtuple_child), 'hi, I am _TFXTypedNamedTupleSubclass')
-    self.assertNotIsInstance(namedtuple_child, _NamedTupleSubclass)
-    self.assertEqual(str(namedtuple_child), '_NamedTuple(attrib=3)')
-    self.assertNotIsInstance(typed_namedtuple_child, _TypedNamedTupleSubclass)
-    self.assertEqual(str(typed_namedtuple_child), '_TypedNamedTuple(attrib=4)')
-
-
-if __name__ == '__main__':
-  absltest.main()
+    def testPickling(self):
+        tfx_namedtuple_child = _TFXNamedTupleSubclass(1)
+        tfx_typed_namedtuple_child = _TFXTypedNamedTupleSubclass(2)
+        namedtuple_child = _NamedTupleSubclass(3)
+        typed_namedtuple_child = _TypedNamedTupleSubclass(4)
+
+        # This emulates hack that is done in PySpark.
+        for obj in sys.modules["__main__"].__dict__.values():
+            if (
+                isinstance(obj, type)
+                and obj.__base__ is tuple
+                and hasattr(obj, "_fields")
+                and "__reduce__" not in obj.__dict__
+            ):
+                _hack_namedtuple(obj)  # hack inplace
+
+        # Subclasses are preserved.
+        self.assertIsInstance(tfx_namedtuple_child, _TFXNamedTupleSubclass)
+        self.assertEqual(str(tfx_namedtuple_child), "hi, I am _TFXNamedTupleSubclass")
+        self.assertIsInstance(tfx_typed_namedtuple_child, _TFXTypedNamedTupleSubclass)
+        self.assertEqual(
+            str(tfx_typed_namedtuple_child), "hi, I am _TFXTypedNamedTupleSubclass"
+        )
+        self.assertIsInstance(namedtuple_child, _NamedTupleSubclass)
+        self.assertEqual(str(namedtuple_child), "hi, I am _NamedTupleSubclass")
+        self.assertIsInstance(typed_namedtuple_child, _TypedNamedTupleSubclass)
+        self.assertEqual(
+            str(typed_namedtuple_child), "hi, I am _TypedNamedTupleSubclass"
+        )
+
+        # Perform pickling.
+        tfx_namedtuple_child = pickle.loads(pickle.dumps(tfx_namedtuple_child))
+        tfx_typed_namedtuple_child = pickle.loads(
+            pickle.dumps(tfx_typed_namedtuple_child)
+        )
+        namedtuple_child = pickle.loads(pickle.dumps(namedtuple_child))
+        typed_namedtuple_child = pickle.loads(pickle.dumps(typed_namedtuple_child))
+
+        # `_TFXNamedTupleSubclass` and `_TFXTypedNamedTupleSubclass` are preserved,
+        # but the `_NamedTupleSubclass` and `_TypedNamedTupleSubclass` instances
+        # are pickled and unpickled as their parent classes.
+        self.assertIsInstance(tfx_namedtuple_child, _TFXNamedTupleSubclass)
+        self.assertEqual(str(tfx_namedtuple_child), "hi, I am _TFXNamedTupleSubclass")
+        self.assertIsInstance(tfx_typed_namedtuple_child, _TFXTypedNamedTupleSubclass)
+        self.assertEqual(
+            str(tfx_typed_namedtuple_child), "hi, I am _TFXTypedNamedTupleSubclass"
+        )
+        self.assertNotIsInstance(namedtuple_child, _NamedTupleSubclass)
+        self.assertEqual(str(namedtuple_child), "_NamedTuple(attrib=3)")
+        self.assertNotIsInstance(typed_namedtuple_child, _TypedNamedTupleSubclass)
+        self.assertEqual(str(typed_namedtuple_child), "_TypedNamedTuple(attrib=4)")
+
+
+if __name__ == "__main__":
+    absltest.main()
diff --git a/tfx_bsl/types_compat.py b/tfx_bsl/types_compat.py
index 91a09125..fb7c802f 100644
--- a/tfx_bsl/types_compat.py
+++ b/tfx_bsl/types_compat.py
@@ -22,7 +22,19 @@
 #     should be removed by copybara and/or pystrip, except for the import line
 #     which will be dealt by note 1.
 
-from apache_beam.typehints import Any, Dict, Generator, Iterable, Iterator, List, Optional, Set, Tuple, TypeVariable, Union  # pylint: disable=unused-import,g-multiple-import
+from apache_beam.typehints import (  # pylint: disable=unused-import,g-multiple-import
+    Any,
+    Dict,
+    Generator,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    TypeVariable,
+    Union,
+)
 
 # pylint: disable=invalid-name
 Callable = None
diff --git a/tfx_bsl/version.py b/tfx_bsl/version.py
index e84d2e9c..7ab5f2e8 100644
--- a/tfx_bsl/version.py
+++ b/tfx_bsl/version.py
@@ -14,4 +14,4 @@
 """Contains the version string of tfx_bsl."""
 
 # Note that setup.py uses this version.
-__version__ = '1.18.0.dev'
+__version__ = "1.18.0.dev"