Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/execute-notebooks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Execute Notebooks

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "notebooks/**/*.ipynb"
- ".github/workflows/execute-notebooks.yml"
push:
branches: [ main ]
paths:
- "notebooks/**/*.ipynb"
- ".github/workflows/execute-notebooks.yml"
# Allow manual triggering
workflow_dispatch:

permissions:
contents: read

jobs:
execute_tests:
runs-on: ubuntu-latest
strategy:
matrix:
# Set the notebooks to execute
notebook_to_execute: ["notebooks/use-cases/document-conversion-standard.ipynb"]

# Set the files use in each notebook execution
file_to_use: ["https://raw.githubusercontent.com/py-pdf/sample-files/refs/heads/main/001-trivial/minimal-document.pdf"]
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip

- name: Install Testing Tools
run: |
pip install papermill ipykernel
ipython kernel install --name "python3" --user

- name: Execute Notebooks
run: |
set -ux

NOTEBOOK="${{ matrix.notebook_to_execute }}"
FILE="${{ matrix.file_to_use }}"

echo "Executing notebook '$NOTEBOOK' with file '$FILE'..."

papermill $NOTEBOOK $NOTEBOOK.tmp.ipynb -b $(echo -n "files: [\"$FILE\"]" | base64 -w 0)

echo "✓ Notebook $NOTEBOOK executed successfully"
43 changes: 43 additions & 0 deletions .github/workflows/validate-notebooks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Validate Notebooks

on:
pull_request:
types: [opened, synchronize, reopened]
paths:
- "notebooks/**/*.ipynb"
- ".github/workflows/validate-notebooks.yml"
push:
branches: [ main ]
paths:
- "notebooks/**/*.ipynb"
- ".github/workflows/validate-notebooks.yml"

permissions:
contents: read

jobs:
validate_tests:
runs-on: ubuntu-latest
strategy:
matrix:
# Set the notebooks to validate, wildcards are allowed
notebooks_to_validate: ["notebooks/**/*.ipynb"]
steps:
- uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.12"
cache: pip

- name: Install Testing Tools
run: |
pip install -e .[test]
ipython kernel install --name "python3" --user

- name: Run Formatting Tests
run: make format-notebooks-check

- name: Validate Notebook Parameters
run: make test-notebook-parameters
7 changes: 6 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.PHONY: format-python format-notebook format-python-check format-notebooks-check
.PHONY: format-python format-notebook format-python-check format-notebooks-check test-notebook-parameters

USE_CASES := $(wildcard notebooks/use-cases/*.ipynb)
TUTORIALS := $(wildcard notebooks/tutorials/*.ipynb)
Expand All @@ -24,3 +24,8 @@ format-notebooks-check:
format-python-check:
ruff format --check $(ALL_PYTHON_FILES)
@echo "ruff format check passed :)"

test-notebook-parameters:
@echo "Running notebook parameters validation..."
pytest tests/test_notebook_parameters.py -v
@echo "Notebook parameters test passed :)"
5 changes: 5 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,8 @@ lint.select = ["E","F","I","B","W","UP"]
# UP007 keep typing.Dict instead of converting to dict (PEP 585)
# UP035 keep typing.Optional instead of converting to | None (PEP 604)
lint.ignore = ["E203","E501","UP006","UP007","UP035"]

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
addopts = ["--tb=short", "-v"]
3 changes: 3 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,5 @@
ruff
nbstripout
pytest
nbformat
papermill
58 changes: 58 additions & 0 deletions tests/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Tests

This directory contains automated tests for the ODH Data Processing project.

## Overview

Tests validate project components including notebooks, Python modules, and configurations to ensure they work correctly in development and CI/CD environments.

## Current Tests

- **`test_notebook_parameters.py`** - Validates notebooks have required parameters cells for papermill execution
- **`conftest.py`** - Shared test configuration and utilities

## Running Tests

```bash
# Run all tests
pytest tests/ -v

# Run specific test file
pytest tests/test_*.py -v

# Run via Makefile (where available)
make test-notebook-parameters
```

Tests also run automatically in CI/CD via GitHub Actions workflows.

## Setup

Install dependencies:
```bash
pip install -r requirements-dev.txt
```

## Adding New Tests

1. Create new test files following `test_*.py` naming convention
2. Add shared utilities to `conftest.py` if needed
3. Update this README to document new test categories
4. Add Makefile targets for convenient test execution

## Configuration

Test configuration is in `pyproject.toml`:
```toml
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
addopts = ["--tb=short", "-v"]
```

## Troubleshooting

Common issues:
- **Test discovery**: Run from project root where `pyproject.toml` exists
- **Import errors**: Install dependencies with `pip install -r requirements-dev.txt`
- **Test failures**: Check error messages for specific validation requirements
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
Shared pytest configuration and fixtures for notebook testing.
"""

import glob
from pathlib import Path

import pytest


def get_notebook_files():
"""Discover all notebook files in the notebooks directory."""
notebook_pattern = "notebooks/**/*.ipynb"
notebook_files = glob.glob(notebook_pattern, recursive=True)

# Convert to Path objects and filter out any non-existent files
notebook_paths = [Path(f) for f in notebook_files if Path(f).exists()]

return notebook_paths


@pytest.fixture
def notebook_files():
"""Fixture that provides all notebook files for testing."""
return get_notebook_files()
107 changes: 107 additions & 0 deletions tests/test_notebook_parameters.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Test notebook parameters cell validation.

This module tests that all notebooks have the required parameters cell
that is needed for papermill execution.
"""

from pathlib import Path

import nbformat
import pytest

from conftest import get_notebook_files


class NotebookParametersValidator:
"""Validator for notebook parameters cells."""

def validate_parameters_cell(self, notebook_path: Path) -> bool:
"""
Validate that a notebook has at least one code cell tagged with 'parameters'.

Args:
notebook_path: Path to the notebook file

Returns:
True if notebook has parameters cell, False otherwise

Raises:
Exception: If notebook cannot be read or validated
"""
try:
# Read notebook with no conversion to preserve original structure
notebook = nbformat.read(notebook_path, nbformat.NO_CONVERT)

# Validate the notebook format
nbformat.validate(notebook)

# Check for parameters cell
has_parameters_cell = False

for cell in notebook.cells:
if cell.cell_type == 'code':
# Check for code cells tagged with 'parameters'
if ('tags' in cell.metadata and
'parameters' in cell.metadata.tags):
has_parameters_cell = True
break

return has_parameters_cell

except Exception as e:
raise Exception(f"Failed to validate notebook {notebook_path}: {str(e)}") from e


@pytest.mark.parametrize("notebook_path", get_notebook_files())
def test_notebook_has_parameters_cell(notebook_path):
"""
Test that each notebook has at least one code cell tagged with 'parameters'.

This is required for papermill execution in the CI/CD pipeline.
"""
validator = NotebookParametersValidator()

has_parameters = validator.validate_parameters_cell(notebook_path)

assert has_parameters, (
f"Notebook '{notebook_path}' does not have any code cell tagged with 'parameters'. "
f"Please add a code cell with metadata tag 'parameters' for papermill execution."
)


def test_validator_itself():
"""Test the validator logic with a mock notebook structure."""
# This tests the validator class itself to ensure it works correctly
validator = NotebookParametersValidator()

# Create a simple test notebook structure
test_notebook = nbformat.v4.new_notebook()

# Add a regular code cell
code_cell = nbformat.v4.new_code_cell("x = 1")
test_notebook.cells.append(code_cell)

# Should fail - no parameters cell yet
test_path = Path("test_notebook_no_params.ipynb")
with open(test_path, 'w') as f:
nbformat.write(test_notebook, f)

try:
assert not validator.validate_parameters_cell(test_path)
finally:
test_path.unlink() # Clean up

# Add a parameters cell
params_cell = nbformat.v4.new_code_cell("# Parameters cell\nfiles = []")
params_cell.metadata["tags"] = ["parameters"]
test_notebook.cells.append(params_cell)

# Should pass - has parameters cell
with open(test_path, 'w') as f:
nbformat.write(test_notebook, f)

try:
assert validator.validate_parameters_cell(test_path)
finally:
test_path.unlink() # Clean up
Loading