diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 7e97e1c0e..28a267517 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -21,6 +21,7 @@ jobs:
     outputs:
       related: ${{ steps.filter.outputs.related }}
       ragas: ${{ steps.filter.outputs.ragas }}
+      ragas_experimental: ${{ steps.filter.outputs.ragas_experimental }}
       docs: ${{ steps.filter.outputs.docs }}
     steps:
       - uses: actions/checkout@v4
@@ -35,12 +36,16 @@ jobs:
               - codecov.yml
               - pyproject.toml
               - requirements/test.txt
+              - Makefile
             ragas:
               - *related
               - "ragas/src/ragas/**"
               - "ragas/tests/**"
             ragas_experimental:
+              - *related
               - "experimental/ragas_experimental/**"
+              - "experimental/tests/**"
+              - "experimental/pyproject.toml"
             docs:
               - *related
               - requirements/docs-requirements.txt
@@ -53,11 +58,24 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, macos-latest, windows-latest]
-        python-version: ["3.9", "3.10", "3.11", "3.12"]
+        include:
+          # Critical path: Latest + oldest Python on Ubuntu (full test suite)
+          - os: ubuntu-latest
+            python-version: "3.9"
+            test-type: "full"
+          - os: ubuntu-latest
+            python-version: "3.12"
+            test-type: "full"
+          # Cross-platform validation (essential tests only)
+          - os: macos-latest
+            python-version: "3.11"
+            test-type: "essential"
+          - os: windows-latest
+            python-version: "3.10"
+            test-type: "essential"
 
     if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }}
-    name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }})
+    name: python${{ matrix.python-version }}_unit_tests (${{ matrix.os }}, ${{ matrix.test-type }})
     runs-on: ${{ matrix.os }}
 
     steps:
@@ -71,80 +89,169 @@ jobs:
           python-version: ${{ matrix.python-version }}
           architecture: ${{ matrix.os == 'macos-latest' && 'arm64' || 'x64' }}
 
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
       - name: Get pip cache dir
         id: cache-dir
         run: |
           echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
 
-      - name: Cache pip dependencies
-        uses: actions/cache@v3
-        id: cache-pip
+      - name: Cache dependencies (UV cache)
+        uses: actions/cache@v4
+        id: cache-deps
         with:
-          path: ${{ steps.cache-dir.outputs.dir }}
-          key: ${{ runner.os }}-tests-${{ hashFiles('requirements/test.txt') }}
+          path: |
+            ${{ steps.cache-dir.outputs.dir }}
+            ~/.cache/uv
+          key: deps-${{ runner.os }}-py${{ matrix.python-version }}-${{ hashFiles('ragas/pyproject.toml', 'requirements/*.txt') }}
+          restore-keys: |
+            deps-${{ runner.os }}-py${{ matrix.python-version }}-
+            deps-${{ runner.os }}-py3.11-
+            deps-${{ runner.os }}-
 
       - name: Install dependencies
         run: |
-          pip install "./ragas"
-          pip install -r requirements/test.txt
-
+          # Use UV with system installation for CI (simpler and more reliable)
+          uv pip install --system -e "./ragas[dev]" --cache-dir ~/.cache/uv
 
       - name: Run unit tests
         run: |
-          # OPTS=(--cov-config pyproject.toml --cov=src/bentoml --cov-append)
+          # Configure test options based on OS and test type
           if [ "${{ matrix.os }}" != 'windows-latest' ]; then
-            # we will use pytest-xdist to improve tests run-time.
+            # Use pytest-xdist to improve test run-time on Linux/macOS
             OPTS=(--dist loadfile -n auto)
           fi
-          # Now run the unit tests
-          pytest --nbmake ragas/tests/unit "${OPTS[@]}"
+          
+          # Run different test suites based on test type
+          if [ "${{ matrix.test-type }}" = "full" ]; then
+            # Full test suite with notebook tests
+            cd ragas && pytest --nbmake tests/unit "${OPTS[@]}"
+          else
+            # Essential tests only (faster for cross-platform validation)
+            cd ragas && pytest tests/unit -k "not slow" "${OPTS[@]}"
+          fi
         env:
           __RAGAS_DEBUG_TRACKING: true
           RAGAS_DO_NOT_TRACK: true
 
-  codestyle_check:
-    runs-on: ubuntu-latest
+  experimental_tests:
     needs:
       - diff
 
-    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas == 'true') || github.event_name == 'push' }}
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          # Focus on stable Python versions for experimental features
+          - os: ubuntu-latest
+            python-version: "3.11"
+            test-type: "full"
+          - os: ubuntu-latest
+            python-version: "3.12"
+            test-type: "full"
+
+    if: ${{ (github.event_name == 'pull_request' && needs.diff.outputs.ragas_experimental == 'true') || github.event_name == 'push' }}
+    name: python${{ matrix.python-version }}_experimental_tests (${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
 
     steps:
       - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 # fetch all tags and branches
 
       - name: Setup python
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
-          python-version: "3.10.6"
+          python-version: ${{ matrix.python-version }}
           architecture: x64
 
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
       - name: Get pip cache dir
         id: cache-dir
         run: |
           echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
 
-      - name: Fetch origin
-        run: git fetch origin "$GITHUB_BASE_REF"
+      - name: Cache dependencies (UV cache)
+        uses: actions/cache@v4
+        id: cache-deps
+        with:
+          path: |
+            ${{ steps.cache-dir.outputs.dir }}
+            ~/.cache/uv
+          key: deps-${{ runner.os }}-py${{ matrix.python-version }}-exp-${{ hashFiles('ragas/pyproject.toml', 'experimental/pyproject.toml') }}
+          restore-keys: |
+            deps-${{ runner.os }}-py${{ matrix.python-version }}-exp-
+            deps-${{ runner.os }}-py${{ matrix.python-version }}-
+            deps-${{ runner.os }}-py3.11-
+            deps-${{ runner.os }}-
+
+      - name: Install dependencies
+        run: |
+          # Use UV with system installation for CI (simpler and more reliable)
+          uv pip install --system -e "./ragas[dev]" --cache-dir ~/.cache/uv
+          uv pip install --system -e "./experimental[dev]" --cache-dir ~/.cache/uv
+
+      - name: Run experimental tests
+        run: |
+          cd experimental && pytest -v --tb=short
+        env:
+          __RAGAS_DEBUG_TRACKING: true
+          RAGAS_DO_NOT_TRACK: true
+
+  code_quality_check:
+    runs-on: ubuntu-latest
+    needs:
+      - diff
+
+    if: ${{ (github.event_name == 'pull_request' && (needs.diff.outputs.ragas == 'true' || needs.diff.outputs.ragas_experimental == 'true')) || github.event_name == 'push' }}
+
+    steps:
+      - uses: actions/checkout@v4
 
-      - name: Setup node
-        uses: actions/setup-node@v3
+      - name: Setup python
+        uses: actions/setup-python@v5
         with:
-          node-version: "17"
+          python-version: "3.11"
+          architecture: x64
 
-      - name: Cache pip dependencies
-        uses: actions/cache@v3
-        id: cache-pip
+      - name: Install uv
+        uses: astral-sh/setup-uv@v4
+
+      - name: Get pip cache dir
+        id: cache-dir
+        run: |
+          echo "dir=$(pip cache dir)" >> $GITHUB_OUTPUT
+
+      - name: Cache dependencies (UV cache)
+        uses: actions/cache@v4
+        id: cache-deps
         with:
-          path: ${{ steps.cache-dir.outputs.dir }}
-          key: codestyle-${{ hashFiles('requirements/dev.txt') }}
+          path: |
+            ${{ steps.cache-dir.outputs.dir }}
+            ~/.cache/uv
+          key: deps-ubuntu-py3.11-codestyle-${{ hashFiles('ragas/pyproject.toml', 'experimental/pyproject.toml', 'requirements/*.txt') }}
+          restore-keys: |
+            deps-ubuntu-py3.11-codestyle-
+            deps-ubuntu-py3.11-
+            deps-ubuntu-
 
       - name: Install dependencies
         run: |
-          pip install ./ragas
-          pip install -r requirements/dev.txt
+          # Use UV with system installation for CI (simpler and more reliable)
+          uv pip install --system -e "./ragas[dev]" --cache-dir ~/.cache/uv
+          uv pip install --system -e "./experimental[dev]" --cache-dir ~/.cache/uv
+
+      - name: Format check (dry run)
+        run: |
+          # Check if code is properly formatted (without making changes)
+          echo "Checking ragas formatting..."
+          black --check --config ragas/pyproject.toml ragas/src ragas/tests docs
+          ruff check ragas/src docs ragas/tests
+          echo "Checking experimental formatting..."
+          cd experimental && black --check ragas_experimental && ruff check ragas_experimental
 
-      - name: Lint check
-        run: make lint
       - name: Type check
-        if: ${{ github.event_name == 'pull_request' }}
         run: make type
diff --git a/.gitignore b/.gitignore
index d57f3345c..dd57931b7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -177,3 +177,4 @@ uv.lock
 # nbdev
 _proc/
 site/
+**/old_nbs/*.md
diff --git a/CLAUDE.md b/CLAUDE.md
index 825cd8f0d..41d0cc3b9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -191,4 +191,5 @@ analytics_logger.addHandler(console_handler)
 
 ## Memories
 
-- whenever you create such docs put in in /experiments because that is gitignored and you can use it as a scratchpad or tmp directory for storing these
\ No newline at end of file
+- whenever you create such docs put in in /experiments because that is gitignored and you can use it as a scratchpad or tmp directory for storing these
+- always use uv to run python and python related commandline tools like isort, ruff, pyright ect. This is because we are using uv to manage the .venv and dependencies.
\ No newline at end of file
diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md
index ccc2d4f6f..65324e500 100644
--- a/DEVELOPMENT.md
+++ b/DEVELOPMENT.md
@@ -1,231 +1,362 @@
-# Development Guide for ragas
+# Development Guide for Ragas Monorepo
 
-This document provides guidelines for developing and contributing to the ragas project.
+This comprehensive guide covers development workflows for the Ragas monorepo, designed for both human developers and AI agents.
 
-## Monorepo Structure
+## Quick Start (for Developers)
 
-This repository is organized as a monorepo containing multiple projects:
+```bash
+# 1. Clone and enter the repository
+git clone https://github.com/explodinggradients/ragas.git
+cd ragas
+
+# 2. Install uv (if not already installed)
+curl -LsSf https://astral.sh/uv/install.sh | sh
 
-1. **Ragas Core Library** - A comprehensive evaluation toolkit for LLM applications
-2. **Ragas Experimental** - An nbdev-based experimental project for Ragas extensions and UI components
+# 3. Set up development environment
+make setup
 
-The directory structure is as follows:
+# 4. Verify everything works
+make check
 
+# 5. Start developing!
+make help  # See all available commands
 ```
-/
-├── ragas/           # Main ragas project (src/ragas)
-│   ├── src/         # Original source code
-│   ├── tests/       # Original tests
-│   ├── pyproject.toml  # ragas-specific build config
-│
-├── experimental/    # nbdev-based experimental project
-│   ├── nbs/         # Notebooks for nbdev  
-│   ├── ragas_experimental/  # Generated code
-│   ├── pyproject.toml  # experimental-specific config
-│   ├── settings.ini    # nbdev config
-│
-├── docs/            # Combined documentation
-│   ├── main/        # Main ragas docs
-│   ├── experimental/  # Experimental docs (generated by nbdev)
+
+## Quick Start (for AI Agents)
+
+AI agents working with this codebase should use these standardized commands:
+
+```bash
+# Essential commands for AI development
+make help           # See all available targets
+make setup          # Complete environment setup
+make check          # Quick health check (format + type)
+make test           # Run all tests
+make run-ci         # Full CI pipeline locally
+
+# Individual development tasks
+make format         # Format and lint all code
+make type           # Type check all code
+make clean          # Clean generated files
+```
+
+**Key Points for AI Agents:**
+- Always use `make` commands rather than direct tool invocation
+- Use `uv run` prefix for any direct Python tool usage
+- Check `make help` for the complete command reference
+- The CI pipeline uses the same commands as local development
+
+## Monorepo Architecture
+
+This repository is organized as a monorepo containing two main projects:
+
+```
+ragas/
+├── ragas/                          # Core evaluation library
+│   ├── src/ragas/                 # Main source code
+│   ├── tests/                     # Tests (unit, e2e, benchmarks)
+│   └── pyproject.toml             # Dependencies and configuration
 │
-├── scripts/         # Shared build/CI scripts
+├── experimental/                   # Experimental extensions
+│   ├── ragas_experimental/        # Generated Python code
+│   ├── tests/                     # Pytest-based tests
+│   └── pyproject.toml             # Dependencies and configuration
 │
-├── workspace.toml   # Root project config (for dev tools)
-├── Makefile         # Combined build commands
-└── README.md        # Monorepo overview
+├── docs/                          # Combined documentation
+├── .github/workflows/             # CI/CD pipeline
+├── Makefile                       # Unified build commands
+└── CLAUDE.md                      # AI assistant instructions
 ```
 
-## Setting up the Development Environment
+### Project Relationships
+- **Ragas Core**: The main evaluation toolkit for LLM applications
+- **Ragas Experimental**: Extensions for advanced features and UI components
+- **Shared Infrastructure**: Unified CI/CD, documentation, and build system
 
-1. **Fork the Repository**
-   Fork the [ragas repository](https://github.com/explodinggradients/ragas) on GitHub.
+## Development Environment Setup
 
-2. **Clone your Fork**
-   ```
-   git clone https://github.com/YOUR_USERNAME/ragas.git
-   cd ragas
-   ```
+### Prerequisites
+- Python 3.9+ 
+- [uv](https://docs.astral.sh/uv/) (recommended) or pip
+- Git
 
-3. **Set up a Virtual Environment**
-   ```
-   python -m venv venv
-   source venv/bin/activate  # On Windows, use `venv\Scripts\activate`
-   ```
+### Setup Process
 
-4. **Install Dependencies**
-   ```
-   pip install -U setuptools  # Required on newer Python versions (e.g., 3.11)
-   pip install -e ".[dev]"
-   ```
+#### Option 1: Using Make (Recommended)
+```bash
+make setup
+```
 
-## Development Workflow
+#### Option 2: Manual Setup
+```bash
+# Install uv if not available
+curl -LsSf https://astral.sh/uv/install.sh | sh
 
-1. **Create a New Branch**
-   ```
-   git checkout -b feature/your-feature-name
-   ```
+# Install both projects
+uv pip install -e "./ragas[dev]"
+uv pip install -e "./experimental[dev]"
+```
 
-2. **Make Changes and Commit**
-   ```
-   git add .
-   git commit -m "Your descriptive commit message"
-   ```
+### Verification
+```bash
+make check  # Runs format + type checking
+make test   # Runs all tests
+```
 
-3. **Push Changes to Your Fork**
-   ```
-   git push origin feature/your-feature-name
-   ```
+## Available Commands Reference
 
-4. **Create a Pull Request**
-   Go to the original ragas repository and create a new pull request from your feature branch.
+Run `make help` to see all targets. Here are the essential commands:
 
-## Monorepo Development
+### Setup & Installation
+- `make install` - Install dependencies for both projects
+- `make setup` - Complete development environment setup
 
-This monorepo uses a unified Makefile to manage both projects. Here are some common commands:
+### Code Quality
+- `make format` - Format and lint all code (includes unused import cleanup)
+- `make type` - Type check all code
+- `make check` - Quick health check (format + type, no tests)
 
-### Setting Up Both Projects
+### Testing
+- `make test` - Run all unit tests
+- `make test-e2e` - Run end-to-end tests
+- `make benchmarks` - Run performance benchmarks
+- `make benchmarks-docker` - Run benchmarks in Docker
 
-Install development dependencies for both projects:
-```bash
-# For ragas core
-pip install -e ".[dev]"
+### CI/Build
+- `make run-ci` - Run complete CI pipeline locally
+- `make clean` - Clean all generated files
 
-# For experimental project
-pip install -e "./experimental[dev]"
-```
+### Documentation
+- `make build-docs` - Build all documentation
+- `make serve-docs` - Serve documentation locally
 
-### Code Quality Commands
+## Development Workflows
 
+### Daily Development
 ```bash
-# Format all code
-make format-all
+# 1. Start your work
+git checkout -b feature/your-feature
 
-# Lint all code
-make lint-all
+# 2. Make changes to code
 
-# Type check all code
-make type-all
+# 3. Check your work
+make check           # Format and type check
+make test            # Run tests
+
+# 4. Commit and push
+git add .
+git commit -m "feat: your feature description"
+git push origin feature/your-feature
+```
+
+### Before Submitting PR
+```bash
+make run-ci          # Run full CI pipeline
+# Ensure all checks pass before creating PR
 ```
 
-You can also work with individual projects:
+### Working with Specific Projects
 
+#### Ragas Core Development
 ```bash
-# For ragas core only
-make format
-make lint
-make type
+# Navigate to the ragas directory for project-specific work
+cd ragas
+uv run pytest tests/unit          # Run specific tests
+uv run pyright src               # Type check specific code
+```
 
-# For experimental only
-make format-experimental
-make lint-experimental
-make type-experimental
+#### Experimental Development
+```bash
+# Navigate to experimental directory
+cd experimental  
+uv run pytest                   # Run experimental tests
 ```
 
-### Testing Commands
+## Testing Strategy
 
-```bash
-# Run all tests
-make test-all
+### Test Types
+1. **Unit Tests**: Fast, isolated tests for individual components
+2. **End-to-End Tests**: Integration tests for complete workflows
+3. **Benchmarks**: Performance tests for evaluation metrics
 
-# Run ragas core tests
+### Running Tests
+```bash
+# All tests
 make test
 
-# Run experimental tests
-make test-experimental
+# Specific test categories
+cd ragas && uv run pytest tests/unit
+cd ragas && uv run pytest tests/e2e  
+cd experimental && uv run pytest
+
+# With coverage or specific options
+cd ragas && uv run pytest tests/unit -k "test_name"
 ```
 
-### Documentation Commands
+### Test Organization
+- **Ragas Core**: `ragas/tests/` (unit, e2e, benchmarks)
+- **Experimental**: `experimental/tests/` (unit, e2e)
 
-```bash
-# Build all documentation
-make build-docsite
+## Code Quality & CI/CD
 
-# Build ragas core docs
-make build-docsite-ragas
+### Code Quality Pipeline
+The `make format` command runs:
+1. **isort**: Import sorting
+2. **black**: Code formatting
+3. **ruff --fix-only**: Auto-fix issues (including unused imports)
+4. **ruff check**: Final linting validation
+
+### Type Checking
+```bash
+make type  # Type check all code with pyright
+```
 
-# Build experimental docs
-make build-docsite-experimental
+### CI/CD Pipeline
+Our GitHub Actions CI runs:
+1. **Dependency Installation**: Using uv for consistent environments
+2. **Code Quality Checks**: Format and type validation
+3. **Testing**: Unit and integration tests across Python 3.9-3.12
+4. **Multi-OS Testing**: Ubuntu, macOS, Windows
 
-# Serve documentation locally
-make serve-docsite
+### Local CI Simulation
+```bash
+make run-ci  # Runs: format + type + test
 ```
 
-### Project-Specific Development
+## Project-Specific Guidelines
 
-#### Ragas Core
-For the main Ragas library, follow the standard development workflow described above.
+### Ragas Core
+- **Language**: Python with type hints
+- **Testing**: pytest with nbmake for notebook tests
+- **Style**: Google-style docstrings
+- **Architecture**: Modular metrics and evaluation framework
 
-#### Experimental Project
-The experimental project uses [nbdev](https://nbdev.fast.ai/) for development:
+### Experimental
+- **Dependencies**: Defined in `pyproject.toml`
+- **Testing**: Pure pytest (no nbdev)
+- **Features**: Advanced evaluation tools and UI components
 
-1. Make changes in the notebook files in `experimental/nbs/`
-2. Run `nbdev_export` to generate Python code
-3. Run tests with `pytest` in the experimental directory
-4. Generate docs with `nbdev_docs`
+### Adding Dependencies
+- **Ragas Core**: Add to `ragas/pyproject.toml`
+- **Experimental**: Add to `experimental/pyproject.toml`
+- **Always**: Test with `make install` and `make test`
 
-When submitting pull requests, please specify which project your changes affect:
-- `[ragas]` for core library changes
-- `[experimental]` for experimental project changes
-- `[monorepo]` for changes that affect the overall repository structure
+## Troubleshooting
 
-## Coding Standards
+### Common Issues
 
-- Follow PEP 8 guidelines for Python code.
-- Use type hints where possible.
-- Write docstrings for all functions, classes, and modules.
-- Ensure all tests pass before submitting a pull request.
+#### Import Errors
+```bash
+# Reinstall in development mode
+make install
+```
 
-You can run the following command to check for code style issues:
+#### Test Failures
 ```bash
-make run-ci
+# Run specific failing test
+cd ragas && uv run pytest tests/unit/test_specific.py -v
+
+# Check test dependencies
+cd experimental && uv run pytest --collect-only
+```
+
+#### Formatting Issues
+```bash
+# Fix formatting
+make format
+
+# Check specific files
+uv run ruff check path/to/file.py --fix
 ```
 
-Adding a `V=1` option makes the output more verbose, showing normally hidden commands, like so:
+#### CI Failures
 ```bash
-make run-ci V=1
+# Run the same checks locally
+make run-ci
+
+# Individual checks
+make format  # Must pass
+make type    # Must pass  
+make test    # Must pass
 ```
 
-## Running Tests
+### Development Environment Issues
 
-To run the test suite:
+#### uv Not Found
+```bash
+# Install uv
+curl -LsSf https://astral.sh/uv/install.sh | sh
+# or use pip: pip install uv
+```
 
+#### Dependency Conflicts
 ```bash
-make test
+# Clean install
+make clean
+make install
 ```
 
-## Documentation
+### Getting Help
+- **Documentation**: Check `CLAUDE.md` for AI assistant guidance
+- **Commands**: Run `make help` for all available targets
+- **Issues**: Check existing GitHub issues or create a new one
 
-- Update documentation for any new features or changes to existing functionality.
-- Use [Google style](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html) for docstrings.
+## Contributing Guidelines
 
-## Submitting Pull Requests
+### Pull Request Process
+1. **Fork** the repository
+2. **Create** a feature branch: `git checkout -b feature/amazing-feature`
+3. **Develop** using the workflows above
+4. **Test** thoroughly: `make run-ci`
+5. **Submit** a pull request with clear description
 
-1. Ensure your code adheres to the project's coding standards.
-2. Include tests for new functionality.
-3. Update documentation as necessary.
-4. Provide a clear description of the changes in your pull request.
+### Commit Message Format
+```
+feat: add new evaluation metric
+fix: resolve import error in experimental
+docs: update development guide
+test: add unit tests for metric base
+```
 
-Thank you for contributing to ragas!
+### Code Review Checklist
+- [ ] All tests pass (`make test`)
+- [ ] Code is formatted (`make format`)
+- [ ] Type checking passes (`make type`)
+- [ ] Documentation is updated
+- [ ] Appropriate tests are included
 
+## AI Agent Best Practices
 
-## Debugging Logs
+### Recommended Workflow for AI Agents
+1. **Understand the task**: Read relevant documentation and code
+2. **Plan the approach**: Identify which project(s) need changes
+3. **Use standardized commands**: Always prefer `make` targets
+4. **Test incrementally**: Use `make check` frequently during development
+5. **Validate thoroughly**: Run `make run-ci` before completing
 
-To view the debug logs for any module, you can set the following.
-```py
-import logging
+### Command Patterns for AI Agents
+```bash
+# Always start with understanding the current state
+make help
+ls -la  # Check current directory structure
+
+# For code changes
+make format  # After making changes
+make test    # Verify functionality
+
+# For investigation
+uv run pytest --collect-only  # See available tests
+uv run ruff check --no-fix    # Check issues without fixing
+```
 
-# Configure logging for the ragas._analytics module
-analytics_logger = logging.getLogger('ragas._analytics')
-analytics_logger.setLevel(logging.DEBUG)
+### File Modification Guidelines
+- **Prefer editing** existing files over creating new ones
+- **Use project conventions** (check similar files for patterns)
+- **Update tests** when modifying functionality
+- **Follow existing code style** (enforced by `make format`)
 
-# Create a console handler and set its level
-console_handler = logging.StreamHandler()
-console_handler.setLevel(logging.DEBUG)
+---
 
-# Create a formatter and add it to the handler
-formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
-console_handler.setFormatter(formatter)
+**Happy coding! 🚀**
 
-# Add the handler to the logger
-analytics_logger.addHandler(console_handler)
-```
\ No newline at end of file
+For additional context and instructions specific to AI assistants, see [CLAUDE.md](./CLAUDE.md).
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 9a866f310..f4df8acf9 100644
--- a/Makefile
+++ b/Makefile
@@ -3,101 +3,159 @@ GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
 # Optionally show commands being executed with V=1
 Q := $(if $(V),,@)
 
+# Common paths
+RAGAS_PATHS := ragas/src ragas/tests docs
+EXPERIMENTAL_PATH := experimental/ragas_experimental
+
 help: ## Show all Makefile targets
 	$(Q)grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
 
-.PHONY: format lint type style clean run-benchmarks format-experimental lint-experimental type-experimental process-experimental-notebooks
-format: ## Running code formatter for ragas
-	@echo "(isort) Ordering imports..."
-	$(Q)cd ragas && isort .
-	@echo "(black) Formatting codebase..."
-	$(Q)black --config ragas/pyproject.toml ragas/src ragas/tests docs
+# =============================================================================
+# SETUP & INSTALLATION
+# =============================================================================
+
+install: ## Install dependencies for both ragas and experimental
+	@echo "Installing dependencies..."
+	@echo "Installing ragas dependencies..."
+	$(Q)uv pip install -e "./ragas[dev]"
+	@echo "Installing experimental dependencies..."
+	$(Q)uv pip install -e "./experimental[dev]"
+
+setup: install ## Complete development environment setup
+	@echo "Development environment setup complete!"
+	@echo "Available commands: make help"
+
+# =============================================================================
+# CODE QUALITY
+# =============================================================================
+
+.PHONY: help install setup format type check clean test test-e2e benchmarks benchmarks-docker run-ci run-ci-fast run-ci-format-check run-ci-type run-ci-tests build-docs serve-docs process-experimental-notebooks
+format: ## Format and lint all code in the monorepo
+	@echo "Formatting and linting all code..."
+	@echo "(black) Formatting ragas..."
+	$(Q)uv run black --config ragas/pyproject.toml $(RAGAS_PATHS)
 	@echo "(black) Formatting stubs..."
-	$(Q)find ragas/src -name "*.pyi" ! -name "*_pb2*" -exec black --pyi --config ragas/pyproject.toml {} \;
-	@echo "(ruff) Running fix only..."
-	$(Q)ruff check ragas/src docs ragas/tests --fix-only
+	$(Q)find ragas/src -name "*.pyi" ! -name "*_pb2*" -exec uv run black --pyi --config ragas/pyproject.toml {} \;
+	@echo "(ruff) Auto-fixing ragas (includes import sorting and unused imports)..."
+	$(Q)uv run ruff check $(RAGAS_PATHS) --fix-only
+	@echo "(ruff) Final linting check for ragas..."
+	$(Q)uv run ruff check $(RAGAS_PATHS)
+	@echo "(black) Formatting experimental..."
+	$(Q)cd experimental && uv run black ragas_experimental
+	@echo "(ruff) Auto-fixing experimental (includes import sorting and unused imports)..."
+	$(Q)cd experimental && uv run ruff check ragas_experimental --fix-only
+	@echo "(ruff) Final linting check for experimental..."
+	$(Q)cd experimental && uv run ruff check ragas_experimental
+
+type: ## Type check all code in the monorepo
+	@echo "Type checking all code..."
+	@echo "(pyright) Typechecking ragas..."
+	$(Q)cd ragas && PYRIGHT_PYTHON_FORCE_VERSION=latest pyright src
+	@echo "(pyright) Typechecking experimental..."
+	# TODO: Fix experimental type checking for 0.3 release - currently has 96 type errors
+	# $(Q)PYRIGHT_PYTHON_FORCE_VERSION=latest pyright $(EXPERIMENTAL_PATH)
+	@echo "Experimental type checking temporarily disabled - TODO: fix for 0.3 release"
+
+check: format type ## Quick health check (format + type, no tests)
+	@echo "Code quality check complete!"
+
+# =============================================================================
+# TESTING
+# =============================================================================
+benchmarks: ## Run all benchmarks locally
+	@echo "Running all benchmarks..."
+	@echo "Running evaluation benchmarks..."
+	$(Q)cd $(GIT_ROOT)/ragas/tests/benchmarks && uv run python benchmark_eval.py
+	@echo "Running testset generation benchmarks..."
+	$(Q)cd $(GIT_ROOT)/ragas/tests/benchmarks && uv run python benchmark_testsetgen.py
+
+benchmarks-docker: ## Run benchmarks in docker
+	@echo "Running benchmarks in docker..."
+	$(Q)cd $(GIT_ROOT) || exit 1
+	docker buildx build --build-arg OPENAI_API_KEY=$(OPENAI_API_KEY) -t ragas-benchmark -f $(GIT_ROOT)/ragas/tests/benchmarks/Dockerfile .
+	docker inspect ragas-benchmark:latest | jq ".[0].Size" | numfmt --to=si
 
-format-experimental: ## Running code formatter for experimental
-	@echo "(black) Formatting experimental codebase..."
-	$(Q)cd experimental && black ragas_experimental
-	@echo "(ruff) Running fix only on experimental..."
-	$(Q)ruff check experimental/ragas_experimental --fix-only
+# =============================================================================
+# CI/BUILD
+# =============================================================================
 
-format-all: format format-experimental ## Format all code in the monorepo
+run-ci: run-ci-format-check run-ci-type run-ci-tests ## Run complete CI pipeline (mirrors GitHub CI exactly)
+	@echo "All CI checks passed!"
 
-lint: ## Running lint checker for ragas
-	@echo "(ruff) Linting ragas project..."
+run-ci-format-check: ## Run format check in dry-run mode (like GitHub CI)
+	@echo "Running format check (dry-run, like GitHub CI)..."
+	@echo "Checking ragas formatting..."
+	$(Q)black --check --config ragas/pyproject.toml ragas/src ragas/tests docs
 	$(Q)ruff check ragas/src docs ragas/tests
+	@echo "Checking experimental formatting..."
+	$(Q)cd experimental && black --check ragas_experimental && ruff check ragas_experimental
+
+run-ci-type: ## Run type checking (matches GitHub CI)
+	@echo "Running type checking (matches GitHub CI)..."
+	$(Q)$(MAKE) type
+
+run-ci-tests: ## Run all tests with GitHub CI options
+	@echo "Running unit tests with CI options..."
+	$(Q)cd ragas && __RAGAS_DEBUG_TRACKING=true RAGAS_DO_NOT_TRACK=true pytest --nbmake tests/unit --dist loadfile -n auto
+	@echo "Running experimental tests with CI options..."
+	$(Q)cd experimental && __RAGAS_DEBUG_TRACKING=true RAGAS_DO_NOT_TRACK=true pytest -v --tb=short
+
+run-ci-fast: ## Fast CI check for quick local validation (2-3 minutes)
+	@echo "Running fast CI check for quick feedback..."
+	@echo "Format check..."
+	$(Q)black --check --config ragas/pyproject.toml ragas/src ragas/tests docs
+	$(Q)ruff check ragas/src docs ragas/tests
+	$(Q)cd experimental && black --check ragas_experimental && ruff check ragas_experimental
+	@echo "Core unit tests (no nbmake for speed)..."
+	$(Q)cd ragas && pytest tests/unit --dist loadfile -n auto -x
+	@echo "Essential experimental tests..."
+	$(Q)cd experimental && pytest -v --tb=short -x
+	@echo "Fast CI check completed!"
 
-lint-experimental: ## Running lint checker for experimental
-	@echo "(ruff) Linting experimental project..."
-	$(Q)ruff check experimental/ragas_experimental
-
-lint-all: lint lint-experimental ## Lint all code in the monorepo
-
-type: ## Running type checker for ragas
-	@echo "(pyright) Typechecking ragas codebase..."
-	cd ragas && PYRIGHT_PYTHON_FORCE_VERSION=latest pyright src
-
-type-experimental: ## Running type checker for experimental
-	@echo "(pyright) Typechecking experimental codebase..."
-	PYRIGHT_PYTHON_FORCE_VERSION=latest pyright experimental/ragas_experimental
-
-type-all: type type-experimental ## Type check all code in the monorepo
 clean: ## Clean all generated files
 	@echo "Cleaning all generated files..."
 	$(Q)cd $(GIT_ROOT)/docs && $(MAKE) clean
-	$(Q)cd $(GIT_ROOT) || exit 1
 	$(Q)find . -type f -name '*.py[co]' -delete -o -type d -name __pycache__ -delete
 
-test: ## Run ragas tests
-	@echo "Running ragas tests..."
-	$(Q)cd ragas && pytest --nbmake tests/unit $(shell if [ -n "$(k)" ]; then echo "-k $(k)"; fi)
-
-test-e2e: ## Run ragas end2end tests
-	echo "running ragas end2end tests..."
-	$(Q)cd ragas && pytest --nbmake tests/e2e -s
+# =============================================================================
+# DOCUMENTATION
+# =============================================================================
 
-test-experimental: ## Run experimental tests
+test: ## Run all unit tests in the monorepo
+	@echo "Running all unit tests..."
+	@echo "Running ragas tests..."
+	$(Q)cd ragas && uv run pytest --nbmake tests/unit $(shell if [ -n "$(k)" ]; then echo "-k $(k)"; fi)
 	@echo "Running experimental tests..."
-	$(Q)cd experimental && pytest
-
-test-all: test test-experimental ## Run all tests
-
-run-ci: format lint type test ## Running all CI checks for ragas
-
-run-ci-experimental: format-experimental lint-experimental type-experimental test-experimental ## Running all CI checks for experimental
-
-run-ci-all: format-all lint-all type-all test-all ## Running all CI checks for both projects
-
-# Docs
-build-docsite-ragas: ## Build ragas documentation
-	@echo "convert ipynb notebooks to md files"
-	$(Q)python $(GIT_ROOT)/docs/ipynb_to_md.py
-	$(Q)mkdocs build
+	$(Q)cd experimental && uv run pytest
+
+test-e2e: ## Run all end-to-end tests
+	@echo "Running all end-to-end tests..."
+	@echo "Running ragas e2e tests..."
+	$(Q)cd ragas && uv run pytest --nbmake tests/e2e -s
+	@echo "Checking for experimental e2e tests..."
+	$(Q)if [ -d "experimental/tests/e2e" ]; then \
+		echo "Running experimental e2e tests..."; \
+		cd experimental && uv run pytest tests/e2e -s; \
+	else \
+		echo "No experimental e2e tests found."; \
+	fi
+
+# =============================================================================
+# BENCHMARKS
+# =============================================================================
 
 process-experimental-notebooks: ## Process experimental notebooks to markdown for MkDocs
 	@echo "Processing experimental notebooks..."
 	$(Q)python $(GIT_ROOT)/scripts/process_experimental_notebooks.py
 
-build-docsite-experimental: process-experimental-notebooks ## Build experimental documentation
+build-docs: process-experimental-notebooks ## Build all documentation
+	@echo "Building all documentation..."
+	@echo "Converting ipynb notebooks to md files..."
+	$(Q)python $(GIT_ROOT)/docs/ipynb_to_md.py
+	@echo "Building ragas documentation..."
+	$(Q)mkdocs build
 	@echo "Building experimental documentation..."
 	$(Q)cd experimental && nbdev_docs
 
-build-docsite: build-docsite-ragas ## Build all documentation
-
-serve-docsite: ## Build and serve documentation
+serve-docs: ## Build and serve documentation locally
 	$(Q)mkdocs serve --dirtyreload
-
-# Benchmarks
-run-benchmarks-eval: ## Run benchmarks for Evaluation
-	@echo "Running benchmarks for Evaluation..."
-	$(Q)cd $(GIT_ROOT)/ragas/tests/benchmarks && python benchmark_eval.py
-run-benchmarks-testset: ## Run benchmarks for TestSet Generation
-	@echo "Running benchmarks for TestSet Generation..."
-	$(Q)cd $(GIT_ROOT)/ragas/tests/benchmarks && python benchmark_testsetgen.py
-run-benchmarks-in-docker: ## Run benchmarks in docker
-	@echo "Running benchmarks in docker..."
-	$(Q)cd $(GIT_ROOT)
-	docker buildx build --build-arg OPENAI_API_KEY=$(OPENAI_API_KEY) -t ragas-benchmark -f $(GIT_ROOT)/ragas/tests/benchmarks/Dockerfile .
-	docker inspect ragas-benchmark:latest | jq ".[0].Size" | numfmt --to=si
diff --git a/docs/howtos/applications/cost.ipynb b/docs/howtos/applications/cost.ipynb
index 6642d08c9..237b6c730 100644
--- a/docs/howtos/applications/cost.ipynb
+++ b/docs/howtos/applications/cost.ipynb
@@ -26,7 +26,19 @@
    "cell_type": "code",
    "metadata": {},
    "outputs": [],
-   "source": "from langchain_openai.chat_models import ChatOpenAI\nfrom langchain_core.prompt_values import StringPromptValue\n# lets import a parser for OpenAI\nfrom ragas.cost import get_token_usage_for_openai\n\ngpt4o = ChatOpenAI(model=\"gpt-4o\")\np = StringPromptValue(text=\"hai there\")\nllm_result = gpt4o.generate_prompt([p])\n\nget_token_usage_for_openai(llm_result)"
+   "source": [
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "from langchain_core.prompt_values import StringPromptValue\n",
+    "\n",
+    "# lets import a parser for OpenAI\n",
+    "from ragas.cost import get_token_usage_for_openai\n",
+    "\n",
+    "gpt4o = ChatOpenAI(model=\"gpt-4o\")\n",
+    "p = StringPromptValue(text=\"hai there\")\n",
+    "llm_result = gpt4o.generate_prompt([p])\n",
+    "\n",
+    "get_token_usage_for_openai(llm_result)"
+   ]
   },
   {
    "cell_type": "markdown",
diff --git a/docs/howtos/customizations/metrics/cost.ipynb b/docs/howtos/customizations/metrics/cost.ipynb
index 9d5664e50..d1730e294 100644
--- a/docs/howtos/customizations/metrics/cost.ipynb
+++ b/docs/howtos/customizations/metrics/cost.ipynb
@@ -37,7 +37,19 @@
    "cell_type": "code",
    "metadata": {},
    "outputs": [],
-   "source": "from langchain_openai.chat_models import ChatOpenAI\nfrom langchain_core.prompt_values import StringPromptValue\n# lets import a parser for OpenAI\nfrom ragas.cost import get_token_usage_for_openai\n\ngpt4o = ChatOpenAI(model=\"gpt-4o\")\np = StringPromptValue(text=\"hai there\")\nllm_result = gpt4o.generate_prompt([p])\n\nget_token_usage_for_openai(llm_result)"
+   "source": [
+    "from langchain_openai.chat_models import ChatOpenAI\n",
+    "from langchain_core.prompt_values import StringPromptValue\n",
+    "\n",
+    "# lets import a parser for OpenAI\n",
+    "from ragas.cost import get_token_usage_for_openai\n",
+    "\n",
+    "gpt4o = ChatOpenAI(model=\"gpt-4o\")\n",
+    "p = StringPromptValue(text=\"hai there\")\n",
+    "llm_result = gpt4o.generate_prompt([p])\n",
+    "\n",
+    "get_token_usage_for_openai(llm_result)"
+   ]
   },
   {
    "cell_type": "markdown",
diff --git a/docs/howtos/integrations/helicone.ipynb b/docs/howtos/integrations/helicone.ipynb
index 2eb70a39c..9bf751f1f 100644
--- a/docs/howtos/integrations/helicone.ipynb
+++ b/docs/howtos/integrations/helicone.ipynb
@@ -47,7 +47,29 @@
    "cell_type": "code",
    "metadata": {},
    "outputs": [],
-   "source": "import os\nfrom datasets import Dataset\nfrom ragas import evaluate\nfrom ragas.metrics import faithfulness, answer_relevancy, context_precision\nfrom ragas.integrations.helicone import helicone_config  # import helicone_config\n\n\n# Set up Helicone\nHELICONE_API_KEY = \"your_helicone_api_key_here\"  # Replace with your actual Helicone API key\nhelicone_config.api_key = HELICONE_API_KEY\nos.environ[\"OPENAI_API_KEY\"] = (\n    \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n)\n\n# Verify Helicone API key is set\nif HELICONE_API_KEY == \"your_helicone_api_key_here\":\n    raise ValueError(\n        \"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\"\n    )"
+   "source": [
+    "import os\n",
+    "from datasets import Dataset\n",
+    "from ragas import evaluate\n",
+    "from ragas.metrics import faithfulness, answer_relevancy, context_precision\n",
+    "from ragas.integrations.helicone import helicone_config  # import helicone_config\n",
+    "\n",
+    "\n",
+    "# Set up Helicone\n",
+    "HELICONE_API_KEY = (\n",
+    "    \"your_helicone_api_key_here\"  # Replace with your actual Helicone API key\n",
+    ")\n",
+    "helicone_config.api_key = HELICONE_API_KEY\n",
+    "os.environ[\"OPENAI_API_KEY\"] = (\n",
+    "    \"your_openai_api_key_here\"  # Replace with your actual OpenAI API key\n",
+    ")\n",
+    "\n",
+    "# Verify Helicone API key is set\n",
+    "if HELICONE_API_KEY == \"your_helicone_api_key_here\":\n",
+    "    raise ValueError(\n",
+    "        \"Please replace 'your_helicone_api_key_here' with your actual Helicone API key.\"\n",
+    "    )"
+   ]
   },
   {
    "cell_type": "markdown",
diff --git a/experimental/old_nbs/_quarto.yml b/experimental/old_nbs/_quarto.yml
deleted file mode 100644
index 59a1e49cc..000000000
--- a/experimental/old_nbs/_quarto.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-project:
-  type: website
-
-format:
-  html:
-    theme: cosmo
-    css: styles.css
-    toc: true
-    keep-md: true
-  commonmark: default
-
-website:
-  twitter-card: true
-  open-graph: true
-  repo-actions: [issue]
-  navbar:
-    background: primary
-    search: true
-  sidebar:
-    style: floating
-
-metadata-files: [nbdev.yml, sidebar.yml]
\ No newline at end of file
diff --git a/experimental/old_nbs/api/backends/factory.ipynb b/experimental/old_nbs/api/backends/factory.ipynb
deleted file mode 100644
index e5d230258..000000000
--- a/experimental/old_nbs/api/backends/factory.ipynb
+++ /dev/null
@@ -1,83 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Backend Factory\n",
-    "\n",
-    "> Factory class for creating the backends or mocked backends."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp backends.factory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import typing as t\n",
-    "import os\n",
-    "\n",
-    "from ragas_experimental.backends.ragas_api_client import RagasApiClient"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class RagasApiClientFactory:\n",
-    "    \"\"\"Factory for creating Ragas API client instances.\"\"\"\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def create(\n",
-    "        app_token: t.Optional[str] = None,\n",
-    "        base_url: t.Optional[str] = None,\n",
-    "    ) -> RagasApiClient:\n",
-    "        \"\"\"Create a Ragas API client.\n",
-    "\n",
-    "        Args:\n",
-    "            api_key: The API key for the Ragas API\n",
-    "            base_url: The base URL for the Ragas API\n",
-    "\n",
-    "        Returns:\n",
-    "            RagasApiClient: A Ragas API client instance\n",
-    "        \"\"\"\n",
-    "        if app_token is None:\n",
-    "            app_token = os.getenv(\"RAGAS_APP_TOKEN\")\n",
-    "\n",
-    "        if app_token is None:\n",
-    "            raise ValueError(\"RAGAS_API_KEY environment variable is not set\")\n",
-    "\n",
-    "        if base_url is None:\n",
-    "            base_url = os.getenv(\"RAGAS_API_BASE_URL\")\n",
-    "\n",
-    "        if base_url is None:\n",
-    "            base_url = \"https://api.dev.app.ragas.io\"\n",
-    "\n",
-    "        return RagasApiClient(app_token=app_token, base_url=base_url)\n"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/backends/ragas_api_client.ipynb b/experimental/old_nbs/api/backends/ragas_api_client.ipynb
deleted file mode 100644
index 53edb3295..000000000
--- a/experimental/old_nbs/api/backends/ragas_api_client.ipynb
+++ /dev/null
@@ -1,2438 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Ragas API Client\n",
-    "\n",
-    "> Python client to api.ragas.io"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp backends.ragas_api_client"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "RAGAS_APP_TOKEN = \"api_key\"\n",
-    "RAGAS_API_ENDPOINT = \"https://api.dev.app.ragas.io\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import httpx\n",
-    "import asyncio\n",
-    "import typing as t\n",
-    "from pydantic import BaseModel, Field\n",
-    "from fastcore.utils import patch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "from ragas_experimental.exceptions import (\n",
-    "    DatasetNotFoundError, DuplicateDatasetError,\n",
-    "    ProjectNotFoundError, DuplicateProjectError,\n",
-    "    ExperimentNotFoundError, DuplicateExperimentError\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class RagasApiClient():\n",
-    "    \"\"\"Client for the Ragas Relay API.\"\"\"\n",
-    "\n",
-    "    def __init__(self, base_url: str, app_token: t.Optional[str] = None):\n",
-    "        \"\"\"Initialize the Ragas API client.\n",
-    "        \n",
-    "        Args:\n",
-    "            base_url: Base URL for the API (e.g., \"http://localhost:8087\")\n",
-    "            app_token: API token for authentication\n",
-    "        \"\"\"\n",
-    "        if not app_token:\n",
-    "            raise ValueError(\"app_token must be provided\")\n",
-    "\n",
-    "        self.base_url = f\"{base_url.rstrip('/')}/api/v1\"\n",
-    "        self.app_token = app_token\n",
-    "\n",
-    "    async def _request(\n",
-    "        self,\n",
-    "        method: str,\n",
-    "        endpoint: str,\n",
-    "        params: t.Optional[t.Dict] = None,\n",
-    "        json_data: t.Optional[t.Dict] = None,\n",
-    "    ) -> t.Dict:\n",
-    "        \"\"\"Make a request to the API.\n",
-    "        \n",
-    "        Args:\n",
-    "            method: HTTP method (GET, POST, PATCH, DELETE)\n",
-    "            endpoint: API endpoint path\n",
-    "            params: Query parameters\n",
-    "            json_data: JSON request body\n",
-    "            \n",
-    "        Returns:\n",
-    "            The response data from the API\n",
-    "        \"\"\"\n",
-    "        url = f\"{self.base_url}/{endpoint.lstrip('/')}\"\n",
-    "        headers = {\"X-App-Token\": self.app_token}\n",
-    "\n",
-    "        async with httpx.AsyncClient() as client:\n",
-    "            response = await client.request(\n",
-    "                method=method, url=url, params=params, json=json_data, headers=headers\n",
-    "            )\n",
-    "\n",
-    "            data = response.json()\n",
-    "\n",
-    "            if response.status_code >= 400 or data.get(\"status\") == \"error\":\n",
-    "                error_msg = data.get(\"message\", \"Unknown error\")\n",
-    "                raise Exception(f\"API Error ({response.status_code}): {error_msg}\")\n",
-    "\n",
-    "            return data.get(\"data\")\n",
-    "\n",
-    "    #---- Resource Handlers ----\n",
-    "    async def _create_resource(self, path, data):\n",
-    "        \"\"\"Generic resource creation.\"\"\"\n",
-    "        return await self._request(\"POST\", path, json_data=data)\n",
-    "        \n",
-    "    async def _list_resources(self, path, **params):\n",
-    "        \"\"\"Generic resource listing.\"\"\"\n",
-    "        return await self._request(\"GET\", path, params=params)\n",
-    "        \n",
-    "    async def _get_resource(self, path):\n",
-    "        \"\"\"Generic resource retrieval.\"\"\"\n",
-    "        return await self._request(\"GET\", path)\n",
-    "        \n",
-    "    async def _update_resource(self, path, data):\n",
-    "        \"\"\"Generic resource update.\"\"\"\n",
-    "        return await self._request(\"PATCH\", path, json_data=data)\n",
-    "        \n",
-    "    async def _delete_resource(self, path):\n",
-    "        \"\"\"Generic resource deletion.\"\"\"\n",
-    "        return await self._request(\"DELETE\", path)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "async def _get_resource_by_name(\n",
-    "    self: RagasApiClient,\n",
-    "    list_method: t.Callable,\n",
-    "    get_method: t.Callable,\n",
-    "    resource_name: str,\n",
-    "    name_field: str,\n",
-    "    not_found_error: t.Type[Exception],\n",
-    "    duplicate_error: t.Type[Exception],\n",
-    "    resource_type_name: str,\n",
-    "    **list_method_kwargs\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Generic method to get a resource by name.\n",
-    "    \n",
-    "    Args:\n",
-    "        list_method: Method to list resources\n",
-    "        get_method: Method to get a specific resource\n",
-    "        resource_name: Name to search for\n",
-    "        name_field: Field name that contains the resource name\n",
-    "        not_found_error: Exception to raise when resource is not found\n",
-    "        duplicate_error: Exception to raise when multiple resources are found\n",
-    "        resource_type_name: Human-readable name of the resource type\n",
-    "        **list_method_kwargs: Additional arguments to pass to list_method\n",
-    "        \n",
-    "    Returns:\n",
-    "        The resource information dictionary\n",
-    "        \n",
-    "    Raises:\n",
-    "        Exception: If resource is not found or multiple resources are found\n",
-    "    \"\"\"\n",
-    "    # Initial pagination parameters\n",
-    "    limit = 50  # Number of items per page\n",
-    "    offset = 0  # Starting position\n",
-    "    matching_resources = []\n",
-    "    \n",
-    "    while True:\n",
-    "        # Get a page of resources\n",
-    "        response = await list_method(\n",
-    "            limit=limit,\n",
-    "            offset=offset,\n",
-    "            **list_method_kwargs\n",
-    "        )\n",
-    "        \n",
-    "        items = response.get(\"items\", [])\n",
-    "        \n",
-    "        # If no items returned, we've reached the end\n",
-    "        if not items:\n",
-    "            break\n",
-    "            \n",
-    "        # Collect all resources with the matching name in this page\n",
-    "        for resource in items:\n",
-    "            if resource.get(name_field) == resource_name:\n",
-    "                matching_resources.append(resource)\n",
-    "        \n",
-    "        # Update offset for the next page\n",
-    "        offset += limit\n",
-    "        \n",
-    "        # If we've processed all items (less than limit returned), exit the loop\n",
-    "        if len(items) < limit:\n",
-    "            break\n",
-    "    \n",
-    "    # Check results\n",
-    "    if not matching_resources:\n",
-    "        context = list_method_kwargs.get(\"project_id\", \"\")\n",
-    "        context_msg = f\" in project {context}\" if context else \"\"\n",
-    "        raise not_found_error(\n",
-    "            f\"No {resource_type_name} with name '{resource_name}' found{context_msg}\"\n",
-    "        )\n",
-    "    \n",
-    "    if len(matching_resources) > 1:\n",
-    "        # Multiple matches found - construct an informative error message\n",
-    "        resource_ids = [r.get(\"id\") for r in matching_resources]\n",
-    "        context = list_method_kwargs.get(\"project_id\", \"\")\n",
-    "        context_msg = f\" in project {context}\" if context else \"\"\n",
-    "        \n",
-    "        raise duplicate_error(\n",
-    "            f\"Multiple {resource_type_name}s found with name '{resource_name}'{context_msg}. \"\n",
-    "            f\"{resource_type_name.capitalize()} IDs: {', '.join(resource_ids)}. \"\n",
-    "            f\"Please use get_{resource_type_name}() with a specific ID instead.\"\n",
-    "        )\n",
-    "    \n",
-    "    # Exactly one match found - retrieve full details\n",
-    "    if \"project_id\" in list_method_kwargs:\n",
-    "        return await get_method(list_method_kwargs[\"project_id\"], matching_resources[0].get(\"id\"))\n",
-    "    else:\n",
-    "        return await get_method(matching_resources[0].get(\"id\"))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Projects"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "#---- Projects ----\n",
-    "@patch\n",
-    "async def list_projects(\n",
-    "    self: RagasApiClient,\n",
-    "    ids: t.Optional[t.List[str]] = None,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List projects.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "\n",
-    "    if ids:\n",
-    "        params[\"ids\"] = \",\".join(ids)\n",
-    "\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "\n",
-    "    return await self._list_resources(\"projects\", **params)\n",
-    "\n",
-    "@patch\n",
-    "async def get_project(self: RagasApiClient, project_id: str) -> t.Dict:\n",
-    "    \"\"\"Get a specific project by ID.\"\"\"\n",
-    "    # TODO: Need get project by title\n",
-    "    return await self._get_resource(f\"projects/{project_id}\")\n",
-    "\n",
-    "@patch\n",
-    "async def create_project(\n",
-    "    self: RagasApiClient, title: str, description: t.Optional[str] = None\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new project.\"\"\"\n",
-    "    data = {\"title\": title}\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._create_resource(\"projects\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def update_project(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    title: t.Optional[str] = None,\n",
-    "    description: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing project.\"\"\"\n",
-    "    data = {}\n",
-    "    if title:\n",
-    "        data[\"title\"] = title\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._update_resource(f\"projects/{project_id}\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def delete_project(self: RagasApiClient, project_id: str) -> None:\n",
-    "    \"\"\"Delete a project.\"\"\"\n",
-    "    await self._delete_resource(f\"projects/{project_id}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 2 projects:\n",
-      "Error: string indices must be integers, not 'str'\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Initialize client with your authentication token\n",
-    "client = RagasApiClient(base_url=RAGAS_API_ENDPOINT, app_token=RAGAS_APP_TOKEN)\n",
-    "\n",
-    "# List projects\n",
-    "try:\n",
-    "    projects = await client.list_projects(limit=10)\n",
-    "    print(f\"Found {len(projects)} projects:\")\n",
-    "    for project in projects:\n",
-    "        print(f\"- {project['title']} (ID: {project['id']})\")\n",
-    "except Exception as e:\n",
-    "    print(f\"Error: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '26b0e577-8ff8-4014-bc7a-cfc410df3488',\n",
-       " 'title': 'test project',\n",
-       " 'description': 'test description',\n",
-       " 'created_at': '2025-04-10T00:12:34.606398+00:00',\n",
-       " 'updated_at': '2025-04-10T00:12:34.606398+00:00'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.create_project(\"test project\", \"test description\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'items': [{'id': '1ef0843b-231f-4a2c-b64d-d39bcee9d830',\n",
-       "   'title': 'yann-lecun-wisdom',\n",
-       "   'description': 'Yann LeCun Wisdom',\n",
-       "   'created_at': '2025-04-15T03:27:08.962384+00:00',\n",
-       "   'updated_at': '2025-04-15T03:27:08.962384+00:00'},\n",
-       "  {'id': 'c2d788ec-a602-495b-8ddc-f457ce11b414',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-12T19:47:10.928422+00:00',\n",
-       "   'updated_at': '2025-04-12T19:47:10.928422+00:00'},\n",
-       "  {'id': '0d465f02-c88f-454e-9ff3-780a001e3e21',\n",
-       "   'title': 'test project',\n",
-       "   'description': 'test description',\n",
-       "   'created_at': '2025-04-12T19:46:36.221385+00:00',\n",
-       "   'updated_at': '2025-04-12T19:46:36.221385+00:00'},\n",
-       "  {'id': '2ae1434c-e700-44a7-9528-7c2f03cfb491',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-12T19:46:36.157122+00:00',\n",
-       "   'updated_at': '2025-04-12T19:46:36.157122+00:00'},\n",
-       "  {'id': 'adb45ec6-6902-4339-b05f-3b86fd256c7e',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-12T19:45:54.430913+00:00',\n",
-       "   'updated_at': '2025-04-12T19:45:54.430913+00:00'},\n",
-       "  {'id': '6f26bf5b-af4d-48b5-af2d-13d3e671bbbf',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:56:30.085249+00:00',\n",
-       "   'updated_at': '2025-04-11T00:56:30.085249+00:00'},\n",
-       "  {'id': '63e4fc0f-1a60-441b-bd71-f21ce8e35c7e',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:44:56.031721+00:00',\n",
-       "   'updated_at': '2025-04-11T00:44:56.031721+00:00'},\n",
-       "  {'id': 'db0bedd6-6cfa-4551-b1ab-af78fa82dca7',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:44:17.601598+00:00',\n",
-       "   'updated_at': '2025-04-11T00:44:17.601598+00:00'},\n",
-       "  {'id': '80c8ef9a-23d7-4a9f-a7d7-36c6472ab51e',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:42:37.287184+00:00',\n",
-       "   'updated_at': '2025-04-11T00:42:37.287184+00:00'},\n",
-       "  {'id': 'ae2a5a5c-3902-4ef6-af50-f2d8f27feea6',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:40:53.71528+00:00',\n",
-       "   'updated_at': '2025-04-11T00:40:53.71528+00:00'},\n",
-       "  {'id': '96618f8b-d3a1-4998-9a66-155f8f254512',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:31:21.410658+00:00',\n",
-       "   'updated_at': '2025-04-11T00:31:21.410658+00:00'},\n",
-       "  {'id': '4515aa23-cb4c-4c0a-b833-fefd0a30fdcc',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:27:49.977435+00:00',\n",
-       "   'updated_at': '2025-04-11T00:27:49.977435+00:00'},\n",
-       "  {'id': '138098a4-651e-4dca-b226-d70956b3e039',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-11T00:24:03.39505+00:00',\n",
-       "   'updated_at': '2025-04-11T00:24:03.39505+00:00'},\n",
-       "  {'id': 'bbe45632-3268-43a6-9694-b020b3f5226f',\n",
-       "   'title': 'Demo Project',\n",
-       "   'description': None,\n",
-       "   'created_at': '2025-04-10T22:41:14.663646+00:00',\n",
-       "   'updated_at': '2025-04-10T22:41:14.663646+00:00'},\n",
-       "  {'id': 'df764139-bac7-4aec-af24-5c6886189f84',\n",
-       "   'title': 'SuperMe-Demo',\n",
-       "   'description': 'SuperMe demo to show the team',\n",
-       "   'created_at': '2025-04-10T04:35:18.631257+00:00',\n",
-       "   'updated_at': '2025-04-10T04:35:18.631257+00:00'},\n",
-       "  {'id': 'a6ccabe0-7b8d-4866-98af-f167a36b94ff',\n",
-       "   'title': 'SuperMe',\n",
-       "   'description': 'SuperMe demo to show the team',\n",
-       "   'created_at': '2025-04-10T03:10:29.153622+00:00',\n",
-       "   'updated_at': '2025-04-10T03:10:29.153622+00:00'}],\n",
-       " 'pagination': {'offset': 0,\n",
-       "  'limit': 50,\n",
-       "  'total': 16,\n",
-       "  'order_by': 'created_at',\n",
-       "  'sort_dir': 'desc'}}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.list_projects()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TEST_PROJECT_ID = \"a6ccabe0-7b8d-4866-98af-f167a36b94ff\"\n",
-    "project = await client.get_project(TEST_PROJECT_ID)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "async def get_project_by_name(\n",
-    "    self: RagasApiClient, project_name: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a project by its name.\n",
-    "    \n",
-    "    Args:\n",
-    "        project_name: Name of the project to find\n",
-    "        \n",
-    "    Returns:\n",
-    "        The project information dictionary\n",
-    "        \n",
-    "    Raises:\n",
-    "        ProjectNotFoundError: If no project with the given name is found\n",
-    "        DuplicateProjectError: If multiple projects with the given name are found\n",
-    "    \"\"\"\n",
-    "    return await self._get_resource_by_name(\n",
-    "        list_method=self.list_projects,\n",
-    "        get_method=self.get_project,\n",
-    "        resource_name=project_name,\n",
-    "        name_field=\"title\",  # Projects use 'title' instead of 'name'\n",
-    "        not_found_error=ProjectNotFoundError,\n",
-    "        duplicate_error=DuplicateProjectError,\n",
-    "        resource_type_name=\"project\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'a6ccabe0-7b8d-4866-98af-f167a36b94ff',\n",
-       " 'title': 'SuperMe',\n",
-       " 'description': 'SuperMe demo to show the team',\n",
-       " 'created_at': '2025-04-10T03:10:29.153622+00:00',\n",
-       " 'updated_at': '2025-04-10T03:10:29.153622+00:00'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.get_project_by_name(\"SuperMe\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "#---- Datasets ----\n",
-    "@patch\n",
-    "async def list_datasets(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List datasets in a project.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(f\"projects/{project_id}/datasets\", **params)\n",
-    "\n",
-    "@patch\n",
-    "async def get_dataset(self: RagasApiClient, project_id: str, dataset_id: str) -> t.Dict:\n",
-    "    \"\"\"Get a specific dataset.\"\"\"\n",
-    "    return await self._get_resource(f\"projects/{project_id}/datasets/{dataset_id}\")\n",
-    "\n",
-    "@patch\n",
-    "async def create_dataset(\n",
-    "    self: RagasApiClient, project_id: str, name: str, description: t.Optional[str] = None\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new dataset in a project.\"\"\"\n",
-    "    data = {\"name\": name}\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._create_resource(f\"projects/{project_id}/datasets\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def update_dataset(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    dataset_id: str,\n",
-    "    name: t.Optional[str] = None,\n",
-    "    description: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing dataset.\"\"\"\n",
-    "    data = {}\n",
-    "    if name:\n",
-    "        data[\"name\"] = name\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._update_resource(f\"projects/{project_id}/datasets/{dataset_id}\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def delete_dataset(self: RagasApiClient, project_id: str, dataset_id: str) -> None:\n",
-    "    \"\"\"Delete a dataset.\"\"\"\n",
-    "    await self._delete_resource(f\"projects/{project_id}/datasets/{dataset_id}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "('1ef0843b-231f-4a2c-b64d-d39bcee9d830',\n",
-       " 'a6ccabe0-7b8d-4866-98af-f167a36b94ff')"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# check project ID\n",
-    "projects = await client.list_projects()\n",
-    "projects[\"items\"][0][\"id\"], TEST_PROJECT_ID"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "New dataset created: {'id': '2382037f-906c-45a0-9b9f-702d32903efd', 'name': 'New Dataset', 'description': 'This is a new dataset', 'updated_at': '2025-04-16T03:52:01.91574+00:00', 'created_at': '2025-04-16T03:52:01.91574+00:00', 'version_counter': 0, 'project_id': '1ef0843b-231f-4a2c-b64d-d39bcee9d830'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a new dataset\n",
-    "new_dataset = await client.create_dataset(\n",
-    "    projects[\"items\"][0][\"id\"], \"New Dataset\", \"This is a new dataset\"\n",
-    ")\n",
-    "print(f\"New dataset created: {new_dataset}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Found 2 datasets\n"
-     ]
-    }
-   ],
-   "source": [
-    "# List datasets in the project\n",
-    "datasets = await client.list_datasets(projects[\"items\"][0][\"id\"])\n",
-    "print(f\"Found {len(datasets)} datasets\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Updated dataset: {'id': '8572180f-fddf-46c5-b943-e6ff6448eb01', 'name': 'Updated Dataset', 'description': 'This is an updated dataset', 'created_at': '2025-04-15T03:28:09.050125+00:00', 'updated_at': '2025-04-16T03:52:09.627448+00:00', 'version_counter': 0, 'project_id': '1ef0843b-231f-4a2c-b64d-d39bcee9d830'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "updated_dataset = await client.update_dataset(\n",
-    "    projects[\"items\"][0][\"id\"],\n",
-    "    datasets[\"items\"][0][\"id\"],\n",
-    "    \"Updated Dataset\",\n",
-    "    \"This is an updated dataset\",\n",
-    ")\n",
-    "print(f\"Updated dataset: {updated_dataset}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Dataset deleted\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Delete the dataset\n",
-    "await client.delete_dataset(projects[\"items\"][0][\"id\"], datasets[\"items\"][0][\"id\"])\n",
-    "print(\"Dataset deleted\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For the time being I've also added another option to get the dataset by name too"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "async def get_dataset_by_name(\n",
-    "    self: RagasApiClient, project_id: str, dataset_name: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a dataset by its name.\n",
-    "    \n",
-    "    Args:\n",
-    "        project_id: ID of the project\n",
-    "        dataset_name: Name of the dataset to find\n",
-    "        \n",
-    "    Returns:\n",
-    "        The dataset information dictionary\n",
-    "        \n",
-    "    Raises:\n",
-    "        DatasetNotFoundError: If no dataset with the given name is found\n",
-    "        DuplicateDatasetError: If multiple datasets with the given name are found\n",
-    "    \"\"\"\n",
-    "    return await self._get_resource_by_name(\n",
-    "        list_method=self.list_datasets,\n",
-    "        get_method=self.get_dataset,\n",
-    "        resource_name=dataset_name,\n",
-    "        name_field=\"name\",\n",
-    "        not_found_error=DatasetNotFoundError,\n",
-    "        duplicate_error=DuplicateDatasetError,\n",
-    "        resource_type_name=\"dataset\",\n",
-    "        project_id=project_id\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "DuplicateDatasetError",
-     "evalue": "Multiple datasets found with name 'test' in project a6ccabe0-7b8d-4866-98af-f167a36b94ff. Dataset IDs: 9a48d5d1-531f-424f-b2d2-d8f9bcaeec1e, 483477a4-3d00-4010-a253-c92dee3bc092. Please use get_dataset() with a specific ID instead.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mDuplicateDatasetError\u001b[39m                     Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[19]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m client.get_dataset_by_name(project_id=TEST_PROJECT_ID, dataset_name=\u001b[33m\"\u001b[39m\u001b[33mtest\u001b[39m\u001b[33m\"\u001b[39m)\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[18]\u001b[39m\u001b[32m, line 18\u001b[39m, in \u001b[36mget_dataset_by_name\u001b[39m\u001b[34m(self, project_id, dataset_name)\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;129m@patch\u001b[39m\n\u001b[32m      2\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mget_dataset_by_name\u001b[39m(\n\u001b[32m      3\u001b[39m     \u001b[38;5;28mself\u001b[39m: RagasApiClient, project_id: \u001b[38;5;28mstr\u001b[39m, dataset_name: \u001b[38;5;28mstr\u001b[39m\n\u001b[32m      4\u001b[39m ) -> t.Dict:\n\u001b[32m      5\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Get a dataset by its name.\u001b[39;00m\n\u001b[32m      6\u001b[39m \n\u001b[32m      7\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m     16\u001b[39m \u001b[33;03m        DuplicateDatasetError: If multiple datasets with the given name are found\u001b[39;00m\n\u001b[32m     17\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m18\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._get_resource_by_name(\n\u001b[32m     19\u001b[39m         list_method=\u001b[38;5;28mself\u001b[39m.list_datasets,\n\u001b[32m     20\u001b[39m         get_method=\u001b[38;5;28mself\u001b[39m.get_dataset,\n\u001b[32m     21\u001b[39m         resource_name=dataset_name,\n\u001b[32m     22\u001b[39m         name_field=\u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     23\u001b[39m         not_found_error=DatasetNotFoundError,\n\u001b[32m     24\u001b[39m         duplicate_error=DuplicateDatasetError,\n\u001b[32m     25\u001b[39m         resource_type_name=\u001b[33m\"\u001b[39m\u001b[33mdataset\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     26\u001b[39m         project_id=project_id\n\u001b[32m     27\u001b[39m     )\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[12]\u001b[39m\u001b[32m, line 76\u001b[39m, in \u001b[36m_get_resource_by_name\u001b[39m\u001b[34m(self, list_method, get_method, resource_name, name_field, not_found_error, duplicate_error, resource_type_name, **list_method_kwargs)\u001b[39m\n\u001b[32m     73\u001b[39m     context = list_method_kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mproject_id\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     74\u001b[39m     context_msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m in project \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m context \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m76\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m duplicate_error(\n\u001b[32m     77\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mMultiple \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33ms found with name \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext_msg\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     78\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name.capitalize()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m IDs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join(resource_ids)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     79\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mPlease use get_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m() with a specific ID instead.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     80\u001b[39m     )\n\u001b[32m     82\u001b[39m \u001b[38;5;66;03m# Exactly one match found - retrieve full details\u001b[39;00m\n\u001b[32m     83\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mproject_id\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m list_method_kwargs:\n",
-      "\u001b[31mDuplicateDatasetError\u001b[39m: Multiple datasets found with name 'test' in project a6ccabe0-7b8d-4866-98af-f167a36b94ff. Dataset IDs: 9a48d5d1-531f-424f-b2d2-d8f9bcaeec1e, 483477a4-3d00-4010-a253-c92dee3bc092. Please use get_dataset() with a specific ID instead."
-     ]
-    }
-   ],
-   "source": [
-    "await client.get_dataset_by_name(project_id=TEST_PROJECT_ID, dataset_name=\"test\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Experiments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    " #| export\n",
-    "#---- Experiments ----\n",
-    "@patch\n",
-    "async def list_experiments(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List experiments in a project.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(f\"projects/{project_id}/experiments\", **params)\n",
-    "\n",
-    "@patch\n",
-    "async def get_experiment(self: RagasApiClient, project_id: str, experiment_id: str) -> t.Dict:\n",
-    "    \"\"\"Get a specific experiment.\"\"\"\n",
-    "    return await self._get_resource(f\"projects/{project_id}/experiments/{experiment_id}\")\n",
-    "\n",
-    "@patch\n",
-    "async def create_experiment(\n",
-    "    self: RagasApiClient, project_id: str, name: str, description: t.Optional[str] = None\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new experiment in a project.\"\"\"\n",
-    "    data = {\"name\": name}\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._create_resource(f\"projects/{project_id}/experiments\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def update_experiment(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    experiment_id: str,\n",
-    "    name: t.Optional[str] = None,\n",
-    "    description: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing experiment.\"\"\"\n",
-    "    data = {}\n",
-    "    if name:\n",
-    "        data[\"name\"] = name\n",
-    "    if description:\n",
-    "        data[\"description\"] = description\n",
-    "    return await self._update_resource(f\"projects/{project_id}/experiments/{experiment_id}\", data)\n",
-    "\n",
-    "@patch\n",
-    "async def delete_experiment(self: RagasApiClient, project_id: str, experiment_id: str) -> None:\n",
-    "    \"\"\"Delete an experiment.\"\"\"\n",
-    "    await self._delete_resource(f\"projects/{project_id}/experiments/{experiment_id}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "New experiment created: {'id': 'b575c5d1-6934-45c0-b67a-fc9a4d7bdba3', 'name': 'New Experiment', 'description': 'This is a new experiment', 'updated_at': '2025-04-10T00:12:39.955229+00:00', 'created_at': '2025-04-10T00:12:39.955229+00:00', 'version_counter': 0, 'project_id': '26b0e577-8ff8-4014-bc7a-cfc410df3488'}\n",
-      "Found 2 experiments\n",
-      "Experiment: {'id': 'b575c5d1-6934-45c0-b67a-fc9a4d7bdba3', 'name': 'New Experiment', 'description': 'This is a new experiment', 'created_at': '2025-04-10T00:12:39.955229+00:00', 'updated_at': '2025-04-10T00:12:39.955229+00:00', 'version_counter': 0, 'project_id': '26b0e577-8ff8-4014-bc7a-cfc410df3488'}\n",
-      "Updated experiment: {'id': 'b575c5d1-6934-45c0-b67a-fc9a4d7bdba3', 'name': 'Updated Experiment', 'description': 'This is an updated experiment', 'created_at': '2025-04-10T00:12:39.955229+00:00', 'updated_at': '2025-04-10T00:12:41.676216+00:00', 'version_counter': 0, 'project_id': '26b0e577-8ff8-4014-bc7a-cfc410df3488'}\n",
-      "Experiment deleted\n"
-     ]
-    }
-   ],
-   "source": [
-    "# create a new experiment\n",
-    "new_experiment = await client.create_experiment(\n",
-    "    projects[\"items\"][0][\"id\"], \"New Experiment\", \"This is a new experiment\"\n",
-    ")\n",
-    "print(f\"New experiment created: {new_experiment}\")\n",
-    "# list experiments\n",
-    "experiments = await client.list_experiments(projects[\"items\"][0][\"id\"])\n",
-    "print(f\"Found {len(experiments)} experiments\")\n",
-    "# get a specific experiment\n",
-    "experiment = await client.get_experiment(\n",
-    "    projects[\"items\"][0][\"id\"], experiments[\"items\"][0][\"id\"]\n",
-    ")\n",
-    "print(f\"Experiment: {experiment}\")\n",
-    "# update an experiment\n",
-    "updated_experiment = await client.update_experiment(\n",
-    "    projects[\"items\"][0][\"id\"],\n",
-    "    experiments[\"items\"][0][\"id\"],\n",
-    "    \"Updated Experiment\",\n",
-    "    \"This is an updated experiment\",\n",
-    ")\n",
-    "print(f\"Updated experiment: {updated_experiment}\")\n",
-    "# delete an experiment\n",
-    "await client.delete_experiment(projects[\"items\"][0][\"id\"], experiments[\"items\"][0][\"id\"])\n",
-    "print(\"Experiment deleted\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'items': [{'id': '78fd6c58-7edf-4239-93d1-4f49185d8e49',\n",
-       "   'name': 'New Experiment',\n",
-       "   'description': 'This is a new experiment',\n",
-       "   'created_at': '2025-03-30T06:31:31.689269+00:00',\n",
-       "   'updated_at': '2025-03-30T06:31:31.689269+00:00',\n",
-       "   'project_id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6'},\n",
-       "  {'id': '7c695b58-7fc3-464c-a18b-a96e35f9684d',\n",
-       "   'name': 'New Experiment',\n",
-       "   'description': 'This is a new experiment',\n",
-       "   'created_at': '2025-04-09T17:03:44.340782+00:00',\n",
-       "   'updated_at': '2025-04-09T17:03:44.340782+00:00',\n",
-       "   'project_id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6'}],\n",
-       " 'pagination': {'offset': 0,\n",
-       "  'limit': 50,\n",
-       "  'total': 2,\n",
-       "  'order_by': 'created_at',\n",
-       "  'sort_dir': 'asc'}}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.list_experiments(TEST_PROJECT_ID)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "async def get_experiment_by_name(\n",
-    "    self: RagasApiClient, project_id: str, experiment_name: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get an experiment by its name.\n",
-    "    \n",
-    "    Args:\n",
-    "        project_id: ID of the project containing the experiment\n",
-    "        experiment_name: Name of the experiment to find\n",
-    "        \n",
-    "    Returns:\n",
-    "        The experiment information dictionary\n",
-    "        \n",
-    "    Raises:\n",
-    "        ExperimentNotFoundError: If no experiment with the given name is found\n",
-    "        DuplicateExperimentError: If multiple experiments with the given name are found\n",
-    "    \"\"\"\n",
-    "    return await self._get_resource_by_name(\n",
-    "        list_method=self.list_experiments,\n",
-    "        get_method=self.get_experiment,\n",
-    "        resource_name=experiment_name,\n",
-    "        name_field=\"name\",\n",
-    "        not_found_error=ExperimentNotFoundError,\n",
-    "        duplicate_error=DuplicateExperimentError,\n",
-    "        resource_type_name=\"experiment\",\n",
-    "        project_id=project_id\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "DuplicateExperimentError",
-     "evalue": "Multiple experiments found with name 'test' in project a6ccabe0-7b8d-4866-98af-f167a36b94ff. Experiment IDs: e1ae15aa-2e0e-40dd-902a-0f0e0fd4df69, 52428c79-afdf-468e-82dc-6ef82c5b71d2, 55e14ac3-0037-4909-898f-eee9533a6d3f, 9adfa008-b479-41cf-ba28-c860e01401ea, 233d28c8-6556-49c5-b146-1e001720c214, 6aed5143-3f60-4bf2-bcf2-ecfdb950e992. Please use get_experiment() with a specific ID instead.",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mDuplicateExperimentError\u001b[39m                  Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[23]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m client.get_experiment_by_name(TEST_PROJECT_ID, \u001b[33m\"\u001b[39m\u001b[33mtest\u001b[39m\u001b[33m\"\u001b[39m)\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[22]\u001b[39m\u001b[32m, line 19\u001b[39m, in \u001b[36mget_experiment_by_name\u001b[39m\u001b[34m(self, project_id, experiment_name)\u001b[39m\n\u001b[32m      2\u001b[39m \u001b[38;5;129m@patch\u001b[39m\n\u001b[32m      3\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mget_experiment_by_name\u001b[39m(\n\u001b[32m      4\u001b[39m     \u001b[38;5;28mself\u001b[39m: RagasApiClient, project_id: \u001b[38;5;28mstr\u001b[39m, experiment_name: \u001b[38;5;28mstr\u001b[39m\n\u001b[32m      5\u001b[39m ) -> t.Dict:\n\u001b[32m      6\u001b[39m \u001b[38;5;250m    \u001b[39m\u001b[33;03m\"\"\"Get an experiment by its name.\u001b[39;00m\n\u001b[32m      7\u001b[39m \n\u001b[32m      8\u001b[39m \u001b[33;03m    Args:\u001b[39;00m\n\u001b[32m   (...)\u001b[39m\u001b[32m     17\u001b[39m \u001b[33;03m        DuplicateExperimentError: If multiple experiments with the given name are found\u001b[39;00m\n\u001b[32m     18\u001b[39m \u001b[33;03m    \"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._get_resource_by_name(\n\u001b[32m     20\u001b[39m         list_method=\u001b[38;5;28mself\u001b[39m.list_experiments,\n\u001b[32m     21\u001b[39m         get_method=\u001b[38;5;28mself\u001b[39m.get_experiment,\n\u001b[32m     22\u001b[39m         resource_name=experiment_name,\n\u001b[32m     23\u001b[39m         name_field=\u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     24\u001b[39m         not_found_error=ExperimentNotFoundError,\n\u001b[32m     25\u001b[39m         duplicate_error=DuplicateExperimentError,\n\u001b[32m     26\u001b[39m         resource_type_name=\u001b[33m\"\u001b[39m\u001b[33mexperiment\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     27\u001b[39m         project_id=project_id\n\u001b[32m     28\u001b[39m     )\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[12]\u001b[39m\u001b[32m, line 76\u001b[39m, in \u001b[36m_get_resource_by_name\u001b[39m\u001b[34m(self, list_method, get_method, resource_name, name_field, not_found_error, duplicate_error, resource_type_name, **list_method_kwargs)\u001b[39m\n\u001b[32m     73\u001b[39m     context = list_method_kwargs.get(\u001b[33m\"\u001b[39m\u001b[33mproject_id\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m     74\u001b[39m     context_msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33m in project \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m context \u001b[38;5;28;01melse\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m---> \u001b[39m\u001b[32m76\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m duplicate_error(\n\u001b[32m     77\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mMultiple \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33ms found with name \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mcontext_msg\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     78\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name.capitalize()\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m IDs: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m, \u001b[39m\u001b[33m'\u001b[39m.join(resource_ids)\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m. \u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     79\u001b[39m         \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mPlease use get_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresource_type_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m() with a specific ID instead.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m     80\u001b[39m     )\n\u001b[32m     82\u001b[39m \u001b[38;5;66;03m# Exactly one match found - retrieve full details\u001b[39;00m\n\u001b[32m     83\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[33m\"\u001b[39m\u001b[33mproject_id\u001b[39m\u001b[33m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m list_method_kwargs:\n",
-      "\u001b[31mDuplicateExperimentError\u001b[39m: Multiple experiments found with name 'test' in project a6ccabe0-7b8d-4866-98af-f167a36b94ff. Experiment IDs: e1ae15aa-2e0e-40dd-902a-0f0e0fd4df69, 52428c79-afdf-468e-82dc-6ef82c5b71d2, 55e14ac3-0037-4909-898f-eee9533a6d3f, 9adfa008-b479-41cf-ba28-c860e01401ea, 233d28c8-6556-49c5-b146-1e001720c214, 6aed5143-3f60-4bf2-bcf2-ecfdb950e992. Please use get_experiment() with a specific ID instead."
-     ]
-    }
-   ],
-   "source": [
-    "await client.get_experiment_by_name(TEST_PROJECT_ID, \"test\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Columns (for datasets)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "from ragas_experimental.typing import ColumnType"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "#---- Dataset Columns ----\n",
-    "@patch\n",
-    "async def list_dataset_columns(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    dataset_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List columns in a dataset.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/columns\", **params\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def get_dataset_column(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, column_id: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a specific column in a dataset.\"\"\"\n",
-    "    return await self._get_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\"\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def create_dataset_column(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    dataset_id: str,\n",
-    "    id: str,\n",
-    "    name: str,\n",
-    "    type: str,\n",
-    "    col_order: t.Optional[int] = None,\n",
-    "    settings: t.Optional[t.Dict] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new column in a dataset.\"\"\"\n",
-    "    data = {\"id\": id, \"name\": name, \"type\": type}\n",
-    "    if col_order is not None:\n",
-    "        data[\"col_order\"] = col_order\n",
-    "    if settings:\n",
-    "        data[\"settings\"] = settings\n",
-    "    return await self._create_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/columns\", data\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def update_dataset_column(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, column_id: str, **column_data\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing column in a dataset.\"\"\"\n",
-    "    return await self._update_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\",\n",
-    "        column_data,\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def delete_dataset_column(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, column_id: str\n",
-    ") -> None:\n",
-    "    \"\"\"Delete a column from a dataset.\"\"\"\n",
-    "    await self._delete_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'cc6794e1-3505-4d5c-b403-ca7e55142bbc',\n",
-       " 'name': 'New Dataset for testing columns',\n",
-       " 'description': 'This is a new dataset for testing columns',\n",
-       " 'updated_at': '2025-04-16T18:05:53.249101+00:00',\n",
-       " 'created_at': '2025-04-16T18:05:53.249101+00:00',\n",
-       " 'version_counter': 0,\n",
-       " 'project_id': '3d9b529b-c23f-4e87-8a26-dd1923749aa7'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "datasets = await client.create_dataset(\n",
-    "    projects[\"items\"][0][\"id\"],\n",
-    "    \"New Dataset for testing columns\",\n",
-    "    \"This is a new dataset for testing columns\",\n",
-    ")\n",
-    "datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'new_column_5',\n",
-       " 'name': 'New Column 5',\n",
-       " 'type': 'select',\n",
-       " 'settings': {'id': 'new_column_5',\n",
-       "  'name': 'New Column 5',\n",
-       "  'type': 'select',\n",
-       "  'width': 255,\n",
-       "  'options': [{'name': 'name', 'value': 'name'},\n",
-       "   {'name': 'age', 'value': 'age'},\n",
-       "   {'name': 'gender', 'value': 'gender'}],\n",
-       "  'isVisible': True,\n",
-       "  'isEditable': True},\n",
-       " 'created_at': '2025-04-16T18:11:14.305975+00:00',\n",
-       " 'updated_at': '2025-04-16T18:11:14.305975+00:00',\n",
-       " 'datatable_id': 'cc6794e1-3505-4d5c-b403-ca7e55142bbc'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# add a new column to the dataset\n",
-    "new_column = await client.create_dataset_column(\n",
-    "    project_id=projects[\"items\"][0][\"id\"],\n",
-    "    dataset_id=datasets[\"id\"],\n",
-    "    id=\"new_column_5\",\n",
-    "    name=\"New Column 3\",\n",
-    "    type=ColumnType.SELECT.value,\n",
-    "    settings={\n",
-    "        \"width\": 255,\n",
-    "        \"isVisible\": True,\n",
-    "        \"isEditable\": True,\n",
-    "        \"options\": [\n",
-    "            {\"name\": \"name\", \"color\": \"hsl(200, 100%, 50%)\", \"value\": \"name\"},\n",
-    "            {\"name\": \"age\", \"color\": \"hsl(200, 100%, 50%)\", \"value\": \"age\"},\n",
-    "            {\"name\": \"gender\", \"color\": \"hsl(200, 100%, 50%)\", \"value\": \"gender\"},\n",
-    "        ]\n",
-    "    },\n",
-    ")\n",
-    "new_column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'items': [{'id': 'dQ7hCb1AUfog',\n",
-       "   'name': 'tags_color_coded',\n",
-       "   'type': 'select',\n",
-       "   'settings': {'id': 'dQ7hCb1AUfog',\n",
-       "    'name': 'tags_color_coded',\n",
-       "    'type': 'select',\n",
-       "    'width': 255,\n",
-       "    'options': [{'name': 'red', 'color': 'hsl(0, 85%, 60%)', 'value': 'red'},\n",
-       "     {'name': 'green', 'color': 'hsl(30, 85%, 60%)', 'value': 'green'},\n",
-       "     {'name': 'blue', 'color': 'hsl(45, 85%, 60%)', 'value': 'blue'}],\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True},\n",
-       "   'created_at': '2025-04-16T19:00:39.936764+00:00',\n",
-       "   'updated_at': '2025-04-16T19:00:39.936764+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'},\n",
-       "  {'id': 'eCAiMBRqm0Uc',\n",
-       "   'name': 'id',\n",
-       "   'type': 'number',\n",
-       "   'settings': {'id': 'eCAiMBRqm0Uc',\n",
-       "    'name': 'id',\n",
-       "    'type': 'number',\n",
-       "    'width': 255,\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True},\n",
-       "   'created_at': '2025-04-16T19:00:39.971857+00:00',\n",
-       "   'updated_at': '2025-04-16T19:00:39.971857+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'},\n",
-       "  {'id': 'fRegl7Ucx3Sp',\n",
-       "   'name': 'description',\n",
-       "   'type': 'longText',\n",
-       "   'settings': {'id': 'fRegl7Ucx3Sp',\n",
-       "    'name': 'description',\n",
-       "    'type': 'longText',\n",
-       "    'width': 255,\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True,\n",
-       "    'max_length': 1000},\n",
-       "   'created_at': '2025-04-16T19:00:40.055047+00:00',\n",
-       "   'updated_at': '2025-04-16T19:00:40.055047+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'},\n",
-       "  {'id': 'foebrzYhiu9x',\n",
-       "   'name': 'tags',\n",
-       "   'type': 'select',\n",
-       "   'settings': {'id': 'foebrzYhiu9x',\n",
-       "    'name': 'tags',\n",
-       "    'type': 'select',\n",
-       "    'width': 255,\n",
-       "    'options': [{'name': 'tag1', 'color': 'hsl(0, 85%, 60%)', 'value': 'tag1'},\n",
-       "     {'name': 'tag2', 'color': 'hsl(30, 85%, 60%)', 'value': 'tag2'},\n",
-       "     {'name': 'tag3', 'color': 'hsl(45, 85%, 60%)', 'value': 'tag3'}],\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True},\n",
-       "   'created_at': '2025-04-16T19:00:40.084457+00:00',\n",
-       "   'updated_at': '2025-04-16T19:00:40.084457+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'},\n",
-       "  {'id': 'ciAzRUhKct9c',\n",
-       "   'name': 'name',\n",
-       "   'type': 'longText',\n",
-       "   'settings': {'id': 'ciAzRUhKct9c',\n",
-       "    'name': 'name',\n",
-       "    'type': 'longText',\n",
-       "    'width': 255,\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True,\n",
-       "    'max_length': 1000},\n",
-       "   'created_at': '2025-04-16T19:00:40.232989+00:00',\n",
-       "   'updated_at': '2025-04-16T19:00:40.232989+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'},\n",
-       "  {'id': 'iAW5muBh9mc251p8-LqKz',\n",
-       "   'name': 'url',\n",
-       "   'type': 'url',\n",
-       "   'settings': {'id': 'iAW5muBh9mc251p8-LqKz',\n",
-       "    'name': 'url',\n",
-       "    'type': 'url',\n",
-       "    'width': 192,\n",
-       "    'position': 5,\n",
-       "    'isVisible': True,\n",
-       "    'isEditable': True},\n",
-       "   'created_at': '2025-04-16T20:13:09.418698+00:00',\n",
-       "   'updated_at': '2025-04-16T20:13:16.914367+00:00',\n",
-       "   'datatable_id': '271b8bc7-2d04-43b8-8960-ce20365f546b'}],\n",
-       " 'pagination': {'offset': 0,\n",
-       "  'limit': 50,\n",
-       "  'total': 6,\n",
-       "  'order_by': 'created_at',\n",
-       "  'sort_dir': 'asc'}}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.list_dataset_columns(projects[\"items\"][0][\"id\"], \"271b8bc7-2d04-43b8-8960-ce20365f546b\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'new_column_3',\n",
-       " 'name': 'New Column 3',\n",
-       " 'type': 'text',\n",
-       " 'settings': {'id': 'new_column_3',\n",
-       "  'name': 'New Column 3',\n",
-       "  'type': 'text',\n",
-       "  'max_length': 255,\n",
-       "  'is_required': True},\n",
-       " 'created_at': '2025-04-10T02:22:07.300895+00:00',\n",
-       " 'updated_at': '2025-04-10T02:22:07.300895+00:00',\n",
-       " 'datatable_id': 'ebc3dd3e-f88b-4f8b-8c72-6cfcae0a0cd4'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "col3 = await client.get_dataset_column(\n",
-    "    projects[\"items\"][0][\"id\"], datasets[\"id\"], \"new_column_3\"\n",
-    ")\n",
-    "col3"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'new_column_3',\n",
-       " 'name': 'New Column 3 Updated',\n",
-       " 'type': 'number',\n",
-       " 'settings': {'id': 'new_column_3',\n",
-       "  'name': 'New Column 3',\n",
-       "  'type': 'text',\n",
-       "  'max_length': 255,\n",
-       "  'is_required': True},\n",
-       " 'created_at': '2025-04-10T02:22:07.300895+00:00',\n",
-       " 'updated_at': '2025-04-10T02:22:11.116882+00:00',\n",
-       " 'datatable_id': 'ebc3dd3e-f88b-4f8b-8c72-6cfcae0a0cd4'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.update_dataset_column(\n",
-    "    projects[\"items\"][0][\"id\"],\n",
-    "    datasets[\"id\"],\n",
-    "    \"new_column_3\",\n",
-    "    name=\"New Column 3 Updated\",\n",
-    "    type=ColumnType.NUMBER.value,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "await client.delete_dataset_column(\n",
-    "    projects[\"items\"][0][\"id\"], datasets[\"id\"], \"new_column_3\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Rows (for datasets)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "#---- Dataset Rows ----\n",
-    "@patch\n",
-    "async def list_dataset_rows(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    dataset_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List rows in a dataset.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/rows\", **params\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def get_dataset_row(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, row_id: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a specific row in a dataset.\"\"\"\n",
-    "    return await self._get_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\"\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def create_dataset_row(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, id: str, data: t.Dict\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new row in a dataset.\"\"\"\n",
-    "    row_data = {\"id\": id, \"data\": data}\n",
-    "    return await self._create_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/rows\", row_data\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def update_dataset_row(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, row_id: str, data: t.Dict\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing row in a dataset.\"\"\"\n",
-    "    row_data = {\"data\": data}\n",
-    "    return await self._update_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\",\n",
-    "        row_data,\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def delete_dataset_row(\n",
-    "    self: RagasApiClient, project_id: str, dataset_id: str, row_id: str\n",
-    ") -> None:\n",
-    "    \"\"\"Delete a row from a dataset.\"\"\"\n",
-    "    await self._delete_resource(\n",
-    "        f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\"\n",
-    "    )\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'3374b891-8398-41bd-8f81-2867759df294'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "datasets[\"id\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '',\n",
-       " 'data': {'id': '', 'new_column_3': 'name'},\n",
-       " 'created_at': '2025-04-16T17:46:39.100525+00:00',\n",
-       " 'updated_at': '2025-04-16T17:46:39.100525+00:00',\n",
-       " 'datatable_id': '3374b891-8398-41bd-8f81-2867759df294'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.create_dataset_row(\n",
-    "    project_id=projects[\"items\"][0][\"id\"],\n",
-    "    dataset_id=datasets[\"id\"],\n",
-    "    id=\"\",\n",
-    "    data={\"new_column_3\": \"name\"},\n",
-    ")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Get a Dataset Visualized - Created From UI\n",
-    "Lets Create a new dataset and add columns and rows via the endpoint to see how it behaves"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'https://dev.app.ragas.io/dashboard/projects/e1b3f1e4-d344-48f4-a178-84e7e32e6ab6/datasets/dbccf6aa-b923-47ed-8e97-bd46f2f2cee8'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# generate a dataset\n",
-    "dataset = await client.create_dataset(\n",
-    "    project_id=TEST_PROJECT_ID,\n",
-    "    name=\"Dataset Visualized from UI\",\n",
-    "    description=\"This is a dataset created from the UI\",\n",
-    ")\n",
-    "\n",
-    "# show url\n",
-    "WEB_ENDPOINT = \"https://dev.app.ragas.io\"\n",
-    "url = f\"{WEB_ENDPOINT}/dashboard/projects/{TEST_PROJECT_ID}/datasets/{dataset['id']}\"\n",
-    "url"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# list columns\n",
-    "columns = await client.list_dataset_columns(TEST_PROJECT_ID, dataset[\"id\"])\n",
-    "# list rows\n",
-    "rows = await client.list_dataset_rows(TEST_PROJECT_ID, dataset[\"id\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'items': [],\n",
-       " 'pagination': {'offset': 0,\n",
-       "  'limit': 50,\n",
-       "  'total': 0,\n",
-       "  'order_by': 'created_at',\n",
-       "  'sort_dir': 'asc'}}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "columns\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'items': [],\n",
-       " 'pagination': {'offset': 0,\n",
-       "  'limit': 50,\n",
-       "  'total': 0,\n",
-       "  'order_by': 'created_at',\n",
-       "  'sort_dir': 'asc'}}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "rows"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Create a Dataset from data\n",
-    "\n",
-    "we want to be able to use the API with python data like this `t.List[t.Dict]`.\n",
-    "```py\n",
-    "# how we want the data to look\n",
-    "data = [\n",
-    "    {\n",
-    "        \"id\": \"1\",\n",
-    "        \"query\": \"What is the capital of France?\",\n",
-    "        \"persona\": \"John\",\n",
-    "        \"ground_truth\": \"Paris\",\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"2\",\n",
-    "        \"query\": \"What is the capital of Germany?\",\n",
-    "        \"persona\": \"Jane\",\n",
-    "        \"ground_truth\": \"Berlin\",\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"3\",\n",
-    "        \"query\": \"What is the capital of Italy?\",\n",
-    "        \"persona\": \"John\",\n",
-    "        \"ground_truth\": \"Rome\",\n",
-    "    },\n",
-    "]\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['number', 'text', 'longText', 'select', 'date', 'multiSelect', 'checkbox', 'custom']\n"
-     ]
-    }
-   ],
-   "source": [
-    "# print out column types\n",
-    "print([col.value for col in ColumnType])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# it should be able to handle simple python dicts\n",
-    "data = [\n",
-    "    {\n",
-    "        \"id\": \"1\",\n",
-    "        \"query\": \"What is the capital of France?\",\n",
-    "        \"persona\": \"John\",\n",
-    "        \"ground_truth\": \"Paris\",\n",
-    "    },\n",
-    "    {\n",
-    "        \"id\": \"2\",\n",
-    "        \"query\": \"What is the capital of Germany?\",\n",
-    "        \"persona\": \"Jane\",\n",
-    "        \"ground_truth\": \"Berlin\",\n",
-    "    },\n",
-    "]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "There can be 2 ways to pass in data\n",
-    "\n",
-    "1. Data can come as either as simple dicts\n",
-    "\n",
-    "```py\n",
-    "data = [\n",
-    "    {\"column_1\": \"value\", \"column_2\": \"value\"}\n",
-    "]\n",
-    "```\n",
-    "\n",
-    "2. or if you want to give more settings\n",
-    "\n",
-    "```py\n",
-    "data = [\n",
-    "    {\n",
-    "        \"column_1\": {\"data\": \"value\", \"type\": ColumnType.text},\n",
-    "        \"column_2\": {\"data\": \"value\", \"type\": ColumnType.number},\n",
-    "    }\n",
-    "]\n",
-    "```\n",
-    "\n",
-    "3. after that you will have to pass a list `Column` and `Row` to add it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test data\n",
-    "test_data_columns = [\n",
-    "    {\"name\": \"id\", \"type\": ColumnType.NUMBER.value},\n",
-    "    {\"name\": \"query\", \"type\": ColumnType.TEXT.value},\n",
-    "    {\"name\": \"persona\", \"type\": ColumnType.TEXT.value},\n",
-    "    {\"name\": \"ground_truth\", \"type\": ColumnType.TEXT.value},\n",
-    "]\n",
-    "\n",
-    "test_data_rows = [{\n",
-    "    \"id\": \"1\",\n",
-    "    \"query\": \"What is the capital of France?\",\n",
-    "    \"persona\": \"John\",\n",
-    "    \"ground_truth\": \"Paris\",\n",
-    "}, {\n",
-    "    \"id\": \"2\",\n",
-    "    \"query\": \"What is the capital of Germany?\",\n",
-    "    \"persona\": \"Jane\",\n",
-    "    \"ground_truth\": \"Berlin\",\n",
-    "}, {\n",
-    "    \"id\": \"3\",\n",
-    "    \"query\": \"What is the capital of Italy?\",\n",
-    "    \"persona\": \"John\",\n",
-    "    \"ground_truth\": \"Rome\",\n",
-    "}]\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import uuid\n",
-    "import string"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_nano_id(size=12):\n",
-    "    # Define characters to use (alphanumeric)\n",
-    "    alphabet = string.ascii_letters + string.digits\n",
-    "    \n",
-    "    # Generate UUID and convert to int\n",
-    "    uuid_int = uuid.uuid4().int\n",
-    "    \n",
-    "    # Convert to base62\n",
-    "    result = \"\"\n",
-    "    while uuid_int:\n",
-    "        uuid_int, remainder = divmod(uuid_int, len(alphabet))\n",
-    "        result = alphabet[remainder] + result\n",
-    "    \n",
-    "    # Pad if necessary and return desired length\n",
-    "    return result[:size]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Anvz5k9geU7T'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Usage\n",
-    "nano_id = create_nano_id()  # e.g., \"8dK9cNw3mP5x\"\n",
-    "nano_id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import uuid\n",
-    "import string"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_nano_id(size=12):\n",
-    "    # Define characters to use (alphanumeric)\n",
-    "    alphabet = string.ascii_letters + string.digits\n",
-    "    \n",
-    "    # Generate UUID and convert to int\n",
-    "    uuid_int = uuid.uuid4().int\n",
-    "    \n",
-    "    # Convert to base62\n",
-    "    result = \"\"\n",
-    "    while uuid_int:\n",
-    "        uuid_int, remainder = divmod(uuid_int, len(alphabet))\n",
-    "        result = alphabet[remainder] + result\n",
-    "    \n",
-    "    # Pad if necessary and return desired length\n",
-    "    return result[:size]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'Anvz5k9geU7T'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Usage\n",
-    "nano_id = create_nano_id()  # e.g., \"8dK9cNw3mP5x\"\n",
-    "nano_id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "# Default settings for columns\n",
-    "DEFAULT_SETTINGS = {\n",
-    "    \"is_required\": False,\n",
-    "    \"max_length\": 1000\n",
-    "}\n",
-    "\n",
-    "# Model definitions\n",
-    "class Column(BaseModel):\n",
-    "    id: str = Field(default_factory=create_nano_id)\n",
-    "    name: str = Field(...)\n",
-    "    type: str = Field(...)\n",
-    "    settings: t.Dict = Field(default_factory=lambda: DEFAULT_SETTINGS.copy())\n",
-    "    col_order: t.Optional[int] = Field(default=None)\n",
-    "\n",
-    "class RowCell(BaseModel):\n",
-    "    data: t.Any = Field(...)\n",
-    "    column_id: str = Field(...)\n",
-    "\n",
-    "class Row(BaseModel):\n",
-    "    id: str = Field(default_factory=create_nano_id)\n",
-    "    data: t.List[RowCell] = Field(...)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "#---- Resource With Data Helper Methods ----\n",
-    "@patch\n",
-    "async def _create_with_data(\n",
-    "    self: RagasApiClient,\n",
-    "    resource_type: str,\n",
-    "    project_id: str,\n",
-    "    name: str, \n",
-    "    description: str,\n",
-    "    columns: t.List[Column],\n",
-    "    rows: t.List[Row],\n",
-    "    batch_size: int = 50\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Generic method to create a resource with columns and rows.\n",
-    "    \n",
-    "    Args:\n",
-    "        resource_type: Type of resource (\"dataset\" or \"experiment\")\n",
-    "        project_id: Project ID\n",
-    "        name: Resource name\n",
-    "        description: Resource description\n",
-    "        columns: List of column definitions\n",
-    "        rows: List of row data\n",
-    "        batch_size: Number of operations to perform concurrently\n",
-    "        \n",
-    "    Returns:\n",
-    "        The created resource\n",
-    "    \"\"\"\n",
-    "    # Select appropriate methods based on resource type\n",
-    "    if resource_type == \"dataset\":\n",
-    "        create_fn = self.create_dataset\n",
-    "        create_col_fn = self.create_dataset_column\n",
-    "        create_row_fn = self.create_dataset_row\n",
-    "        delete_fn = self.delete_dataset\n",
-    "        id_key = \"dataset_id\"\n",
-    "    elif resource_type == \"experiment\":\n",
-    "        create_fn = self.create_experiment\n",
-    "        create_col_fn = self.create_experiment_column\n",
-    "        create_row_fn = self.create_experiment_row\n",
-    "        delete_fn = self.delete_experiment\n",
-    "        id_key = \"experiment_id\"\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported resource type: {resource_type}\")\n",
-    "        \n",
-    "    try:\n",
-    "        # Create the resource\n",
-    "        resource = await create_fn(project_id, name, description)\n",
-    "        \n",
-    "        # Process columns in batches\n",
-    "        for i in range(0, len(columns), batch_size):\n",
-    "            batch = columns[i:i+batch_size]\n",
-    "            col_tasks = []\n",
-    "            \n",
-    "            for col in batch:\n",
-    "                params = {\n",
-    "                    \"project_id\": project_id,\n",
-    "                    id_key: resource[\"id\"], # dataset_id here\n",
-    "                    \"id\": col.id,\n",
-    "                    \"name\": col.name,\n",
-    "                    \"type\": col.type,\n",
-    "                    \"settings\": col.settings\n",
-    "                }\n",
-    "                if col.col_order is not None:\n",
-    "                    params[\"col_order\"] = col.col_order\n",
-    "                \n",
-    "                col_tasks.append(create_col_fn(**params))\n",
-    "            \n",
-    "            await asyncio.gather(*col_tasks)\n",
-    "            \n",
-    "        # Process rows in batches\n",
-    "        for i in range(0, len(rows), batch_size):\n",
-    "            batch = rows[i:i+batch_size]\n",
-    "            row_tasks = []\n",
-    "            \n",
-    "            for row in batch:\n",
-    "                row_data = {cell.column_id: cell.data for cell in row.data}\n",
-    "                row_tasks.append(\n",
-    "                    create_row_fn(\n",
-    "                        project_id=project_id,\n",
-    "                        **{id_key: resource[\"id\"]},\n",
-    "                        id=row.id,\n",
-    "                        data=row_data\n",
-    "                    )\n",
-    "                )\n",
-    "            \n",
-    "            await asyncio.gather(*row_tasks)\n",
-    "            \n",
-    "        return resource\n",
-    "        \n",
-    "    except Exception as e:\n",
-    "        # Clean up on error\n",
-    "        if 'resource' in locals():\n",
-    "            try:\n",
-    "                await delete_fn(project_id, resource[\"id\"])\n",
-    "            except:\n",
-    "                pass  # Ignore cleanup errors\n",
-    "        raise e\n",
-    "\n",
-    "@patch\n",
-    "async def create_dataset_with_data(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    name: str,\n",
-    "    description: str,\n",
-    "    columns: t.List[Column],\n",
-    "    rows: t.List[Row],\n",
-    "    batch_size: int = 50\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a dataset with columns and rows.\n",
-    "    \n",
-    "    This method creates a dataset and populates it with columns and rows in an\n",
-    "    optimized way using concurrent requests.\n",
-    "    \n",
-    "    Args:\n",
-    "        project_id: Project ID\n",
-    "        name: Dataset name\n",
-    "        description: Dataset description\n",
-    "        columns: List of column definitions\n",
-    "        rows: List of row data\n",
-    "        batch_size: Number of operations to perform concurrently\n",
-    "        \n",
-    "    Returns:\n",
-    "        The created dataset\n",
-    "    \"\"\"\n",
-    "    return await self._create_with_data(\n",
-    "        \"dataset\", project_id, name, description, columns, rows, batch_size\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now lets test this."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Created dataset with ID: 5e7912f4-6a65-4d0c-bf79-0fab9ddda40c\n",
-      "Created 4 columns\n",
-      "Created 3 rows\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create Column objects\n",
-    "column_objects = []\n",
-    "for col in test_data_columns:\n",
-    "    column_objects.append(Column(\n",
-    "        name=col[\"name\"],\n",
-    "        type=col[\"type\"]\n",
-    "        # id and settings will be auto-generated\n",
-    "    ))\n",
-    "\n",
-    "# Create a mapping of column names to their IDs for creating rows\n",
-    "column_map = {col.name: col.id for col in column_objects}\n",
-    "\n",
-    "# Create Row objects\n",
-    "row_objects = []\n",
-    "for row in test_data_rows:\n",
-    "    cells = []\n",
-    "    for key, value in row.items():\n",
-    "        if key in column_map:  # Skip any extra fields not in columns\n",
-    "            cells.append(RowCell(\n",
-    "                data=value,\n",
-    "                column_id=column_map[key]\n",
-    "            ))\n",
-    "    row_objects.append(Row(data=cells))\n",
-    "\n",
-    "# Now we can create the dataset\n",
-    "dataset = await client.create_dataset_with_data(\n",
-    "    project_id=TEST_PROJECT_ID,\n",
-    "    name=\"Capitals Dataset\",\n",
-    "    description=\"A dataset about capital cities\",\n",
-    "    columns=column_objects,\n",
-    "    rows=row_objects\n",
-    ")\n",
-    "\n",
-    "print(f\"Created dataset with ID: {dataset['id']}\")\n",
-    "\n",
-    "# Verify the data\n",
-    "columns = await client.list_dataset_columns(TEST_PROJECT_ID, dataset[\"id\"])\n",
-    "print(f\"Created {len(columns['items'])} columns\")\n",
-    "\n",
-    "rows = await client.list_dataset_rows(TEST_PROJECT_ID, dataset[\"id\"])\n",
-    "print(f\"Created {len(rows['items'])} rows\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'https://dev.app.ragas.io/dashboard/projects/e1b3f1e4-d344-48f4-a178-84e7e32e6ab6/datasets/5e7912f4-6a65-4d0c-bf79-0fab9ddda40c'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# get dataset url\n",
-    "url = f\"{WEB_ENDPOINT}/dashboard/projects/{TEST_PROJECT_ID}/datasets/{dataset['id']}\"\n",
-    "url"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# cleanup\n",
-    "await client.delete_dataset(TEST_PROJECT_ID, dataset[\"id\"])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### The same but for Experiments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "#---- Experiment Columns ----\n",
-    "@patch\n",
-    "async def list_experiment_columns(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    experiment_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List columns in an experiment.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/columns\", **params\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def get_experiment_column(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, column_id: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a specific column in an experiment.\"\"\"\n",
-    "    return await self._get_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\"\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def create_experiment_column(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    experiment_id: str,\n",
-    "    id: str,\n",
-    "    name: str,\n",
-    "    type: str,\n",
-    "    col_order: t.Optional[int] = None,\n",
-    "    settings: t.Optional[t.Dict] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new column in an experiment.\"\"\"\n",
-    "    data = {\"id\": id, \"name\": name, \"type\": type}\n",
-    "    if col_order is not None:\n",
-    "        data[\"col_order\"] = col_order\n",
-    "    if settings:\n",
-    "        data[\"settings\"] = settings\n",
-    "    return await self._create_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/columns\", data\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def update_experiment_column(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, column_id: str, **column_data\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing column in an experiment.\"\"\"\n",
-    "    return await self._update_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\",\n",
-    "        column_data,\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def delete_experiment_column(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, column_id: str\n",
-    ") -> None:\n",
-    "    \"\"\"Delete a column from an experiment.\"\"\"\n",
-    "    await self._delete_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\"\n",
-    "    )\n",
-    "\n",
-    "#---- Experiment Rows ----\n",
-    "@patch\n",
-    "async def list_experiment_rows(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    experiment_id: str,\n",
-    "    limit: int = 50,\n",
-    "    offset: int = 0,\n",
-    "    order_by: t.Optional[str] = None,\n",
-    "    sort_dir: t.Optional[str] = None,\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"List rows in an experiment.\"\"\"\n",
-    "    params = {\"limit\": limit, \"offset\": offset}\n",
-    "    if order_by:\n",
-    "        params[\"order_by\"] = order_by\n",
-    "    if sort_dir:\n",
-    "        params[\"sort_dir\"] = sort_dir\n",
-    "    return await self._list_resources(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/rows\", **params\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def get_experiment_row(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, row_id: str\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Get a specific row in an experiment.\"\"\"\n",
-    "    return await self._get_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\"\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def create_experiment_row(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, id: str, data: t.Dict\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create a new row in an experiment.\"\"\"\n",
-    "    row_data = {\"id\": id, \"data\": data}\n",
-    "    return await self._create_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/rows\", row_data\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def update_experiment_row(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, row_id: str, data: t.Dict\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Update an existing row in an experiment.\"\"\"\n",
-    "    row_data = {\"data\": data}\n",
-    "    return await self._update_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\",\n",
-    "        row_data,\n",
-    "    )\n",
-    "\n",
-    "@patch\n",
-    "async def delete_experiment_row(\n",
-    "    self: RagasApiClient, project_id: str, experiment_id: str, row_id: str\n",
-    ") -> None:\n",
-    "    \"\"\"Delete a row from an experiment.\"\"\"\n",
-    "    await self._delete_resource(\n",
-    "        f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '7c695b58-7fc3-464c-a18b-a96e35f9684d',\n",
-       " 'name': 'New Experiment',\n",
-       " 'description': 'This is a new experiment',\n",
-       " 'updated_at': '2025-04-09T17:03:44.340782+00:00',\n",
-       " 'created_at': '2025-04-09T17:03:44.340782+00:00',\n",
-       " 'version_counter': 0,\n",
-       " 'project_id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6'}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await client.create_experiment(TEST_PROJECT_ID, \"New Experiment\", \"This is a new experiment\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'78fd6c58-7edf-4239-93d1-4f49185d8e49'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "experiments = await client.list_experiments(TEST_PROJECT_ID)\n",
-    "EXPERIMENT_ID = experiments[\"items\"][0][\"id\"]\n",
-    "EXPERIMENT_ID"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "async def create_experiment_with_data(\n",
-    "    self: RagasApiClient,\n",
-    "    project_id: str,\n",
-    "    name: str,\n",
-    "    description: str,\n",
-    "    columns: t.List[Column],\n",
-    "    rows: t.List[Row],\n",
-    "    batch_size: int = 50\n",
-    ") -> t.Dict:\n",
-    "    \"\"\"Create an experiment with columns and rows.\n",
-    "    \n",
-    "    This method creates an experiment and populates it with columns and rows in an\n",
-    "    optimized way using concurrent requests.\n",
-    "    \n",
-    "    Args:\n",
-    "        project_id: Project ID\n",
-    "        name: Experiment name\n",
-    "        description: Experiment description\n",
-    "        columns: List of column definitions\n",
-    "        rows: List of row data\n",
-    "        batch_size: Number of operations to perform concurrently\n",
-    "        \n",
-    "    Returns:\n",
-    "        The created experiment\n",
-    "    \"\"\"\n",
-    "    return await self._create_with_data(\n",
-    "        \"experiment\", project_id, name, description, columns, rows, batch_size\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "#---- Utility Methods ----\n",
-    "@patch\n",
-    "def create_column(\n",
-    "    self: RagasApiClient, \n",
-    "    name: str, \n",
-    "    type: str, \n",
-    "    settings: t.Optional[t.Dict] = None, \n",
-    "    col_order: t.Optional[int] = None,\n",
-    "    id: t.Optional[str] = None\n",
-    ") -> Column:\n",
-    "    \"\"\"Create a Column object.\n",
-    "    \n",
-    "    Args:\n",
-    "        name: Column name\n",
-    "        type: Column type (use ColumnType enum)\n",
-    "        settings: Column settings\n",
-    "        col_order: Column order\n",
-    "        id: Custom ID (generates one if not provided)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Column object\n",
-    "    \"\"\"\n",
-    "    params = {\"name\": name, \"type\": type}\n",
-    "    if settings:\n",
-    "        params[\"settings\"] = settings\n",
-    "    if col_order is not None:\n",
-    "        params[\"col_order\"] = col_order\n",
-    "    if id:\n",
-    "        params[\"id\"] = id\n",
-    "        \n",
-    "    return Column(**params)\n",
-    "    \n",
-    "@patch\n",
-    "def create_row(\n",
-    "    self: RagasApiClient, \n",
-    "    data: t.Dict[str, t.Any], \n",
-    "    column_map: t.Dict[str, str],\n",
-    "    id: t.Optional[str] = None\n",
-    ") -> Row:\n",
-    "    \"\"\"Create a Row object from a dictionary.\n",
-    "    \n",
-    "    Args:\n",
-    "        data: Dictionary mapping column names to values\n",
-    "        column_map: Dictionary mapping column names to column IDs\n",
-    "        id: Custom ID (generates one if not provided)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Row object\n",
-    "    \"\"\"\n",
-    "    cells = []\n",
-    "    for col_name, value in data.items():\n",
-    "        if col_name in column_map:\n",
-    "            cells.append(RowCell(\n",
-    "                data=value,\n",
-    "                column_id=column_map[col_name]\n",
-    "            ))\n",
-    "            \n",
-    "    params = {\"data\": cells}\n",
-    "    if id:\n",
-    "        params[\"id\"] = id\n",
-    "        \n",
-    "    return Row(**params)\n",
-    "    \n",
-    "@patch\n",
-    "def create_column_map(self: RagasApiClient, columns: t.List[Column]) -> t.Dict[str, str]:\n",
-    "    \"\"\"Create a mapping of column names to IDs.\n",
-    "    \n",
-    "    Args:\n",
-    "        columns: List of column objects\n",
-    "        \n",
-    "    Returns:\n",
-    "        Dictionary mapping column names to IDs\n",
-    "    \"\"\"\n",
-    "    return {col.name: col.id for col in columns}\n",
-    "    \n",
-    "@patch\n",
-    "async def convert_raw_data(\n",
-    "    self: RagasApiClient,\n",
-    "    column_defs: t.List[t.Dict],\n",
-    "    row_data: t.List[t.Dict]\n",
-    ") -> t.Tuple[t.List[Column], t.List[Row]]:\n",
-    "    \"\"\"Convert raw data to column and row objects.\n",
-    "    \n",
-    "    Args:\n",
-    "        column_defs: List of column definitions (dicts with name, type)\n",
-    "        row_data: List of dictionaries with row data\n",
-    "        \n",
-    "    Returns:\n",
-    "        Tuple of (columns, rows)\n",
-    "    \"\"\"\n",
-    "    # Create columns\n",
-    "    columns = []\n",
-    "    for col in column_defs:\n",
-    "        columns.append(self.create_column(**col))\n",
-    "        \n",
-    "    # Create column map\n",
-    "    column_map = self.create_column_map(columns)\n",
-    "    \n",
-    "    # Create rows\n",
-    "    rows = []\n",
-    "    for data in row_data:\n",
-    "        rows.append(self.create_row(data, column_map))\n",
-    "        \n",
-    "    return columns, rows"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/dataset.ipynb b/experimental/old_nbs/api/dataset.ipynb
deleted file mode 100644
index a0cdd5f47..000000000
--- a/experimental/old_nbs/api/dataset.ipynb
+++ /dev/null
@@ -1,1552 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Dataset\n",
-    "\n",
-    "> A python list like object that contains your evaluation data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# | export\n",
-    "from abc import ABC, abstractmethod\n",
-    "import os\n",
-    "import typing as t\n",
-    "import csv\n",
-    "import uuid\n",
-    "\n",
-    "from fastcore.utils import patch\n",
-    "import pandas as pd\n",
-    "\n",
-    "from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel\n",
-    "from ragas_experimental.utils import create_nano_id, async_to_sync, get_test_directory\n",
-    "from ragas_experimental.backends.ragas_api_client import RagasApiClient\n",
-    "from ragas_experimental.typing import SUPPORTED_BACKENDS\n",
-    "import ragas_experimental.typing as rt\n",
-    "from ragas_experimental.metric import MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "BaseModelType = t.TypeVar(\"BaseModelType\", bound=BaseModel)\n",
-    "\n",
-    "class DatasetBackend(ABC):\n",
-    "    \"\"\"Abstract base class for dataset backends.\n",
-    "    \n",
-    "    All dataset storage backends must implement these methods.\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def initialize(self, dataset): \n",
-    "        \"\"\"Initialize the backend with dataset information\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def get_column_mapping(self, model): \n",
-    "        \"\"\"Get mapping between model fields and backend columns\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def load_entries(self, model_class): \n",
-    "        \"\"\"Load all entries from storage\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def append_entry(self, entry): \n",
-    "        \"\"\"Add a new entry to storage and return its ID\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def update_entry(self, entry): \n",
-    "        \"\"\"Update an existing entry in storage\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def delete_entry(self, entry_id): \n",
-    "        \"\"\"Delete an entry from storage\"\"\"\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def get_entry_by_field(self, field_name: str, field_value: t.Any, model_class):\n",
-    "        \"\"\"Get an entry by field value\"\"\"\n",
-    "        pass\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class RagasAppBackend(DatasetBackend):\n",
-    "    \"\"\"Backend for storing datasets using the Ragas API.\"\"\"\n",
-    "    \n",
-    "    def __init__(self, ragas_api_client, project_id, dataset_id):\n",
-    "        \"\"\"Initialize the RagasAppBackend.\n",
-    "        \n",
-    "        Args:\n",
-    "            ragas_api_client: The RagasApiClient instance\n",
-    "            project_id: The ID of the project\n",
-    "            dataset_id: The ID of the dataset\n",
-    "        \"\"\"\n",
-    "        self.ragas_api_client = ragas_api_client\n",
-    "        self.project_id = project_id\n",
-    "        self.dataset_id = dataset_id\n",
-    "        self.dataset = None\n",
-    "\n",
-    "    def __str__(self):\n",
-    "        return f\"RagasAppBackend(project_id={self.project_id}, dataset_id={self.dataset_id})\"\n",
-    "\n",
-    "    def __repr__(self):\n",
-    "        return self.__str__()\n",
-    "        \n",
-    "    def initialize(self, dataset):\n",
-    "        \"\"\"Initialize the backend with the dataset instance.\"\"\"\n",
-    "        self.dataset = dataset\n",
-    "        \n",
-    "    def get_column_mapping(self, model):\n",
-    "        \"\"\"Get mapping between model fields and backend columns.\"\"\"\n",
-    "        sync_func = async_to_sync(self.ragas_api_client.list_dataset_columns)\n",
-    "        columns = sync_func(project_id=self.project_id, dataset_id=self.dataset_id)\n",
-    "        column_id_map = {column[\"name\"]: column[\"id\"] for column in columns[\"items\"]}\n",
-    "        \n",
-    "        # Update the model's column mapping with the values from the API\n",
-    "        column_mapping = {}\n",
-    "        for field_name in model.__annotations__:\n",
-    "            if field_name in column_id_map:\n",
-    "                column_mapping[field_name] = column_id_map[field_name]\n",
-    "        \n",
-    "        return column_mapping\n",
-    "    \n",
-    "    def load_entries(self, model_class):\n",
-    "        \"\"\"Load all entries from the API.\"\"\"\n",
-    "        # Get all rows\n",
-    "        sync_func = async_to_sync(self.ragas_api_client.list_dataset_rows)\n",
-    "        response = sync_func(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=self.dataset_id\n",
-    "        )\n",
-    "        \n",
-    "        # Get column mapping (ID -> name)\n",
-    "        column_map = {v: k for k, v in model_class.__column_mapping__.items()}\n",
-    "        \n",
-    "        # Process rows\n",
-    "        entries = []\n",
-    "        for row in response.get(\"items\", []):\n",
-    "            model_data = {}\n",
-    "            row_id = row.get(\"id\")\n",
-    "            \n",
-    "            # Convert from API data format to model fields\n",
-    "            for col_id, value in row.get(\"data\", {}).items():\n",
-    "                if col_id in column_map:\n",
-    "                    field_name = column_map[col_id]\n",
-    "                    model_data[field_name] = value\n",
-    "            \n",
-    "            # Create model instance\n",
-    "            entry = model_class(**model_data)\n",
-    "            \n",
-    "            # Store row ID for future operations\n",
-    "            entry._row_id = row_id\n",
-    "            \n",
-    "            entries.append(entry)\n",
-    "            \n",
-    "        return entries\n",
-    "    \n",
-    "    def append_entry(self, entry):\n",
-    "        \"\"\"Add a new entry to the API and return its ID.\"\"\"\n",
-    "        import ragas_experimental.typing as rt\n",
-    "        \n",
-    "        # Get column mapping\n",
-    "        column_id_map = entry.__class__.__column_mapping__\n",
-    "        \n",
-    "        # Create row data\n",
-    "        row_dict_converted = rt.ModelConverter.instance_to_row(entry)\n",
-    "        row_id = create_nano_id()\n",
-    "        row_data = {}\n",
-    "        \n",
-    "        for column in row_dict_converted[\"data\"]:\n",
-    "            if column[\"column_id\"] in column_id_map:\n",
-    "                row_data[column_id_map[column[\"column_id\"]]] = column[\"data\"]\n",
-    "        \n",
-    "        # Create row in API\n",
-    "        sync_func = async_to_sync(self.ragas_api_client.create_dataset_row)\n",
-    "        response = sync_func(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=self.dataset_id,\n",
-    "            id=row_id,\n",
-    "            data=row_data,\n",
-    "        )\n",
-    "        \n",
-    "        # Return the row ID\n",
-    "        return response[\"id\"]\n",
-    "    \n",
-    "    def update_entry(self, entry):\n",
-    "        \"\"\"Update an existing entry in the API.\"\"\"\n",
-    "        import ragas_experimental.typing as rt\n",
-    "        \n",
-    "        # Get the row ID\n",
-    "        row_id = None\n",
-    "        if hasattr(entry, \"_row_id\") and entry._row_id:\n",
-    "            row_id = entry._row_id\n",
-    "        else:\n",
-    "            raise ValueError(\"Cannot update: entry has no row ID\")\n",
-    "        \n",
-    "        # Get column mapping and prepare data\n",
-    "        column_id_map = entry.__class__.__column_mapping__\n",
-    "        row_dict = rt.ModelConverter.instance_to_row(entry)[\"data\"]\n",
-    "        row_data = {}\n",
-    "        \n",
-    "        for column in row_dict:\n",
-    "            if column[\"column_id\"] in column_id_map:\n",
-    "                row_data[column_id_map[column[\"column_id\"]]] = column[\"data\"]\n",
-    "        \n",
-    "        # Update in API\n",
-    "        sync_func = async_to_sync(self.ragas_api_client.update_dataset_row)\n",
-    "        response = sync_func(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=self.dataset_id,\n",
-    "            row_id=row_id,\n",
-    "            data=row_data,\n",
-    "        )\n",
-    "        \n",
-    "        return response\n",
-    "    \n",
-    "    def delete_entry(self, entry_id):\n",
-    "        \"\"\"Delete an entry from the API.\"\"\"\n",
-    "        # Delete the row\n",
-    "        sync_func = async_to_sync(self.ragas_api_client.delete_dataset_row)\n",
-    "        response = sync_func(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=self.dataset_id,\n",
-    "            row_id=entry_id\n",
-    "        )\n",
-    "        \n",
-    "        return response\n",
-    "    \n",
-    "    def get_entry_by_field(self, field_name, field_value, model_class):\n",
-    "        \"\"\"Get an entry by field value.\"\"\"\n",
-    "        # We don't have direct filtering in the API, so load all and filter\n",
-    "        entries = self.load_entries(model_class)\n",
-    "        \n",
-    "        # Search for matching entry\n",
-    "        for entry in entries:\n",
-    "            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:\n",
-    "                return entry\n",
-    "                \n",
-    "        return None\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class LocalBackend(DatasetBackend):\n",
-    "    def __init__(self, local_root_dir, project_id, dataset_id, dataset_name, type: t.Literal[\"datasets\", \"experiments\"]):\n",
-    "        \"\"\"Initialize the LocalBackend.\n",
-    "        \n",
-    "        Args:\n",
-    "            local_root_dir: The root directory for all projects\n",
-    "            project_id: The ID of the project\n",
-    "            dataset_id: The ID of the dataset\n",
-    "            dataset_name: The name of the dataset\n",
-    "        \"\"\"\n",
-    "        self.local_root_dir = local_root_dir\n",
-    "        self.project_id = project_id\n",
-    "        self.dataset_id = dataset_id\n",
-    "        self.dataset_name = dataset_name\n",
-    "        self.dataset = None\n",
-    "        self.type = type\n",
-    "\n",
-    "    def __str__(self):\n",
-    "        return f\"LocalBackend(local_root_dir={self.local_root_dir}, project_id={self.project_id}, dataset_id={self.dataset_id}, dataset_name={self.dataset_name})\"\n",
-    "\n",
-    "    def __repr__(self):\n",
-    "        return self.__str__()\n",
-    "        \n",
-    "    def initialize(self, dataset):\n",
-    "        \"\"\"Initialize the backend with the dataset instance.\"\"\"\n",
-    "        self.dataset = dataset\n",
-    "        \n",
-    "        # Ensure CSV file exists\n",
-    "        self._ensure_csv_exists()\n",
-    "        \n",
-    "    def _ensure_csv_exists(self):\n",
-    "        \"\"\"Create the CSV file if it doesn't exist.\"\"\"\n",
-    "        csv_path = self._get_csv_path()\n",
-    "        \n",
-    "        # Create directories if needed\n",
-    "        os.makedirs(os.path.dirname(csv_path), exist_ok=True)\n",
-    "        \n",
-    "        # Create file with headers if it doesn't exist\n",
-    "        if not os.path.exists(csv_path):\n",
-    "            # Include _row_id in the headers\n",
-    "            field_names = [\"_row_id\"] + list(self.dataset.model.__annotations__.keys())\n",
-    "            \n",
-    "            with open(csv_path, 'w', newline='') as f:\n",
-    "                writer = csv.writer(f)\n",
-    "                writer.writerow(field_names)\n",
-    "    \n",
-    "    def _get_csv_path(self):\n",
-    "        \"\"\"Get the path to the CSV file.\"\"\"\n",
-    "        return os.path.join(\n",
-    "            self.local_root_dir, \n",
-    "            self.project_id, \n",
-    "            self.type,\n",
-    "            f\"{self.dataset_name}.csv\"\n",
-    "        )\n",
-    "        \n",
-    "    def get_column_mapping(self, model) -> t.Dict:\n",
-    "        \"\"\"Get mapping between model fields and CSV columns.\n",
-    "        \n",
-    "        For CSV, column names directly match field names.\n",
-    "        \"\"\"\n",
-    "        # Simple dictionary comprehension\n",
-    "        return model.model_fields\n",
-    "    \n",
-    "    def load_entries(self, model_class):\n",
-    "        \"\"\"Load all entries from the CSV file.\"\"\"\n",
-    "        csv_path = self._get_csv_path()\n",
-    "        \n",
-    "        if not os.path.exists(csv_path):\n",
-    "            return []\n",
-    "            \n",
-    "        entries = []\n",
-    "        \n",
-    "        with open(csv_path, 'r', newline='') as f:\n",
-    "            reader = csv.DictReader(f)\n",
-    "            \n",
-    "            for row in reader:\n",
-    "                try:\n",
-    "                    # Extract row_id and remove from model data\n",
-    "                    row_id = row.get(\"_row_id\", str(uuid.uuid4()))\n",
-    "                    \n",
-    "                    # Create a copy without _row_id for model instantiation\n",
-    "                    model_data = {k: v for k, v in row.items() if k != \"_row_id\"}\n",
-    "                    \n",
-    "                    # Convert types as needed\n",
-    "                    typed_row = {}\n",
-    "                    for field, value in model_data.items():\n",
-    "                        if field in model_class.model_fields:\n",
-    "                            field_type = model_class.model_fields[field].annotation\n",
-    "                            \n",
-    "                            # Handle basic type conversions\n",
-    "                            if field_type == int:\n",
-    "                                typed_row[field] = int(value) if value else 0\n",
-    "                            elif field_type == float:\n",
-    "                                typed_row[field] = float(value) if value else 0.0\n",
-    "                            elif field_type == bool:\n",
-    "                                typed_row[field] = value.lower() in ('true', 't', 'yes', 'y', '1')\n",
-    "                            else:\n",
-    "                                typed_row[field] = value\n",
-    "                    \n",
-    "                    # Create model instance\n",
-    "                    entry = model_class(**typed_row)\n",
-    "                    \n",
-    "                    # Set the row ID from CSV (or use UUID if not available)\n",
-    "                    entry._row_id = row_id\n",
-    "                    \n",
-    "                    entries.append(entry)\n",
-    "                except Exception as e:\n",
-    "                    print(f\"Error loading row from CSV: {e}\")\n",
-    "        \n",
-    "        return entries\n",
-    "    \n",
-    "    def append_entry(self, entry):\n",
-    "        \"\"\"Add a new entry to the CSV file and return a generated ID.\"\"\"\n",
-    "        csv_path = self._get_csv_path()\n",
-    "        \n",
-    "        # Read existing rows to avoid overwriting\n",
-    "        existing_rows = []\n",
-    "        if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:\n",
-    "            with open(csv_path, 'r', newline='') as f:\n",
-    "                reader = csv.DictReader(f)\n",
-    "                existing_rows = list(reader)\n",
-    "        \n",
-    "        # Generate a row ID if needed\n",
-    "        row_id = getattr(entry, \"_row_id\", None) or str(uuid.uuid4())\n",
-    "        \n",
-    "        # Get field names including row_id\n",
-    "        field_names = [\"_row_id\"] + list(entry.model_fields.keys())\n",
-    "        \n",
-    "        # Convert entry to dict\n",
-    "        entry_dict = entry.model_dump()\n",
-    "        \n",
-    "        # Add row_id to the dict\n",
-    "        entry_dict[\"_row_id\"] = row_id\n",
-    "        \n",
-    "        # Write all rows back with the new entry\n",
-    "        with open(csv_path, 'w', newline='') as f:\n",
-    "            writer = csv.DictWriter(f, fieldnames=field_names)\n",
-    "            writer.writeheader()\n",
-    "            \n",
-    "            # Write existing rows\n",
-    "            for row in existing_rows:\n",
-    "                writer.writerow(row)\n",
-    "                \n",
-    "            # Write new row\n",
-    "            writer.writerow(entry_dict)\n",
-    "        \n",
-    "        # Return the row ID\n",
-    "        return row_id\n",
-    "    \n",
-    "    def update_entry(self, entry):\n",
-    "        \"\"\"Update an existing entry in the CSV file.\n",
-    "        \n",
-    "        Since CSV files don't support in-place updates, we need to\n",
-    "        rewrite the entire file.\n",
-    "        \"\"\"\n",
-    "        # Create a copy of entries to modify\n",
-    "        entries_to_save = list(self.dataset._entries)  # Make a copy\n",
-    "        \n",
-    "        # Find the entry to update\n",
-    "        updated = False\n",
-    "        for i, e in enumerate(entries_to_save):\n",
-    "            if hasattr(e, \"_row_id\") and hasattr(entry, \"_row_id\") and e._row_id == entry._row_id:\n",
-    "                # Update the entry in our copy\n",
-    "                entries_to_save[i] = entry\n",
-    "                updated = True\n",
-    "                break\n",
-    "        \n",
-    "        # If entry wasn't found, just append it\n",
-    "        if not updated and entries_to_save:\n",
-    "            entries_to_save.append(entry)\n",
-    "        \n",
-    "        # Write all entries back to CSV\n",
-    "        self._write_entries_to_csv(entries_to_save)\n",
-    "        \n",
-    "        return True\n",
-    "    \n",
-    "    def delete_entry(self, entry_id):\n",
-    "        \"\"\"Delete an entry from the CSV file.\n",
-    "        \n",
-    "        This method should NOT modify self.dataset._entries directly.\n",
-    "        Dataset.pop() handles that separately.\n",
-    "        \"\"\"\n",
-    "        # Create a copy of entries to modify, excluding the one to delete\n",
-    "        entries_to_save = []\n",
-    "        for e in self.dataset._entries:\n",
-    "            if not (hasattr(e, \"_row_id\") and e._row_id == entry_id):\n",
-    "                entries_to_save.append(e)\n",
-    "        \n",
-    "        # Write all entries back to CSV\n",
-    "        self._write_entries_to_csv(entries_to_save)\n",
-    "        \n",
-    "        return True\n",
-    "    \n",
-    "    def _write_entries_to_csv(self, entries):\n",
-    "        \"\"\"Write all entries to the CSV file.\"\"\"\n",
-    "        csv_path = self._get_csv_path()\n",
-    "        \n",
-    "        if not entries:\n",
-    "            # If no entries, just create an empty CSV with headers\n",
-    "            field_names = [\"_row_id\"] + list(self.dataset.model.model_fields.keys())\n",
-    "            with open(csv_path, 'w', newline='') as f:\n",
-    "                writer = csv.DictWriter(f, fieldnames=field_names)\n",
-    "                writer.writeheader()\n",
-    "            return\n",
-    "            \n",
-    "        # Get field names including _row_id\n",
-    "        field_names = [\"_row_id\"] + list(entries[0].__class__.model_fields.keys())\n",
-    "        \n",
-    "        # Write all entries\n",
-    "        with open(csv_path, 'w', newline='') as f:\n",
-    "            writer = csv.DictWriter(f, fieldnames=field_names)\n",
-    "            writer.writeheader()\n",
-    "            \n",
-    "            for entry in entries:\n",
-    "                # Create a dict with model data + row_id\n",
-    "                entry_dict = entry.model_dump()\n",
-    "                entry_dict[\"_row_id\"] = getattr(entry, \"_row_id\", str(uuid.uuid4()))\n",
-    "                \n",
-    "                writer.writerow(entry_dict)\n",
-    "    \n",
-    "    def get_entry_by_field(self, field_name, field_value, model_class):\n",
-    "        \"\"\"Get an entry by field value.\"\"\"\n",
-    "        entries = self.load_entries(model_class)\n",
-    "        \n",
-    "        for entry in entries:\n",
-    "            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:\n",
-    "                return entry\n",
-    "                \n",
-    "        return None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_dataset_backend(backend_type: SUPPORTED_BACKENDS, **kwargs):\n",
-    "    \"\"\"Factory function to create the appropriate backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        backend_type: The type of backend to create (ragas_app or local)\n",
-    "        **kwargs: Arguments specific to the backend\n",
-    "        \n",
-    "    Returns:\n",
-    "        DatasetBackend: An instance of the requested backend\n",
-    "    \"\"\"\n",
-    "    backend_classes = {\n",
-    "        \"ragas_app\": RagasAppBackend,\n",
-    "        \"local\": LocalBackend,\n",
-    "    }\n",
-    "    \n",
-    "    if backend_type not in backend_classes:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend_type}\")\n",
-    "        \n",
-    "    return backend_classes[backend_type](**kwargs)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "LocalBackend(local_root_dir=/var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/ragas_test_dPmBLc9qLgdj, project_id=test_project, dataset_id=test_dataset, dataset_name=test_dataset)\n"
-     ]
-    }
-   ],
-   "source": [
-    "temp_dir = get_test_directory()\n",
-    "backend = create_dataset_backend(\"local\", local_root_dir=temp_dir, project_id=\"test_project\", dataset_id=\"test_dataset\", dataset_name=\"test_dataset\", type=\"dataset\")\n",
-    "print(backend)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class Dataset(t.Generic[BaseModelType]):\n",
-    "    \"\"\"A list-like interface for managing dataset entries with backend synchronization.\n",
-    "    \n",
-    "    This class behaves like a Python list while synchronizing operations with the\n",
-    "    chosen backend (Ragas API or local filesystem).\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        name: str,\n",
-    "        model: t.Type[BaseModel],\n",
-    "        project_id: str,\n",
-    "        dataset_id: str,\n",
-    "        datatable_type: t.Literal[\"datasets\", \"experiments\"],\n",
-    "        ragas_api_client: t.Optional[RagasApiClient] = None,\n",
-    "        backend: SUPPORTED_BACKENDS = \"local\",\n",
-    "        local_root_dir: t.Optional[str] = None,\n",
-    "    ):\n",
-    "        \"\"\"Initialize a Dataset with the specified backend.\n",
-    "        \n",
-    "        Args:\n",
-    "            name: The name of the dataset\n",
-    "            model: The Pydantic model class for entries\n",
-    "            project_id: The ID of the parent project\n",
-    "            dataset_id: The ID of this dataset\n",
-    "            ragas_api_client: Required for ragas_app backend\n",
-    "            backend: The storage backend to use (ragas_app or local)\n",
-    "            local_root_dir: Required for local backend\n",
-    "        \"\"\"\n",
-    "        # Store basic properties\n",
-    "        self.name = name\n",
-    "        self.model = model\n",
-    "        self.project_id = project_id\n",
-    "        self.dataset_id = dataset_id\n",
-    "        self.backend_type = backend\n",
-    "        self.datatable_type = datatable_type\n",
-    "        self._entries: t.List[BaseModelType] = []\n",
-    "\n",
-    "        # Create the appropriate backend\n",
-    "        backend_params = {}\n",
-    "        if backend == \"ragas_app\":\n",
-    "            if ragas_api_client is None:\n",
-    "                raise ValueError(\"ragas_api_client is required for ragas_app backend\")\n",
-    "            backend_params = {\n",
-    "                \"ragas_api_client\": ragas_api_client,\n",
-    "                \"project_id\": project_id,\n",
-    "                \"dataset_id\": dataset_id\n",
-    "            }\n",
-    "        elif backend == \"local\":\n",
-    "            if local_root_dir is None:\n",
-    "                raise ValueError(\"local_root_dir is required for local backend\")\n",
-    "            backend_params = {\n",
-    "                \"local_root_dir\": local_root_dir,\n",
-    "                \"project_id\": project_id,\n",
-    "                \"dataset_id\": dataset_id,\n",
-    "                \"dataset_name\": name,\n",
-    "                \"type\": self.datatable_type\n",
-    "            }\n",
-    "            \n",
-    "        self._backend = create_dataset_backend(backend, **backend_params)\n",
-    "        \n",
-    "        # Initialize the backend with this dataset\n",
-    "        self._backend.initialize(self)\n",
-    "        \n",
-    "        # Initialize column mapping if it doesn't exist yet\n",
-    "        if not hasattr(self.model, \"__column_mapping__\"):\n",
-    "            self.model.__column_mapping__ = {}\n",
-    "            \n",
-    "        # Get column mappings from backend and update the model's mapping\n",
-    "        column_mapping = self._backend.get_column_mapping(model)\n",
-    "        \n",
-    "        # Update the model's column mapping\n",
-    "        for field_name, column_id in column_mapping.items():\n",
-    "            self.model.__column_mapping__[field_name] = column_id\n",
-    "\n",
-    "    def __getitem__(\n",
-    "        self, key: t.Union[int, slice]\n",
-    "    ) -> t.Union[BaseModelType, \"Dataset[BaseModelType]\"]:\n",
-    "        \"\"\"Get an entry by index or slice.\"\"\"\n",
-    "        if isinstance(key, slice):\n",
-    "            # Create a new dataset with the sliced entries\n",
-    "            new_dataset = type(self)(\n",
-    "                name=self.name,\n",
-    "                model=self.model,\n",
-    "                project_id=self.project_id,\n",
-    "                dataset_id=self.dataset_id,\n",
-    "                backend=self.backend_type,\n",
-    "                datatable_type=self.datatable_type\n",
-    "            )\n",
-    "            # Copy the backend reference\n",
-    "            new_dataset._backend = self._backend\n",
-    "            # Set the entries to the sliced entries\n",
-    "            new_dataset._entries = self._entries[key]\n",
-    "            return new_dataset\n",
-    "        else:\n",
-    "            return self._entries[key]\n",
-    "\n",
-    "    def __setitem__(self, index: int, entry: BaseModelType) -> None:\n",
-    "        \"\"\"Update an entry at the given index and sync to backend.\"\"\"\n",
-    "        if not isinstance(entry, self.model):\n",
-    "            raise TypeError(f\"Entry must be an instance of {self.model.__name__}\")\n",
-    "\n",
-    "        # Get existing entry to get its ID\n",
-    "        existing = self._entries[index]\n",
-    "        if hasattr(existing, \"_row_id\") and existing._row_id:\n",
-    "            entry._row_id = existing._row_id\n",
-    "        \n",
-    "        # Update in backend\n",
-    "        self._backend.update_entry(entry)\n",
-    "        \n",
-    "        # Update local cache\n",
-    "        self._entries[index] = entry\n",
-    "\n",
-    "    def __repr__(self) -> str:\n",
-    "        \"\"\"String representation of the dataset.\"\"\"\n",
-    "        return f\"Dataset(name='{self.name}', model={self.model.__name__}, len={len(self)})\"\n",
-    "\n",
-    "    def __len__(self) -> int:\n",
-    "        \"\"\"Get the number of entries in the dataset.\"\"\"\n",
-    "        return len(self._entries)\n",
-    "\n",
-    "    def __iter__(self) -> t.Iterator[BaseModelType]:\n",
-    "        \"\"\"Iterate over the entries in the dataset.\"\"\"\n",
-    "        return iter(self._entries)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test model\n",
-    "class DatasetModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "\n",
-    "class ExperimentModel(DatasetModel):\n",
-    "    tags: t.Literal[\"tag1\", \"tag2\", \"tag3\"]\n",
-    "    result: MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "DatasetModel(id=0, name='test', description='test description')"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_instance = DatasetModel(\n",
-    "    id=0, \n",
-    "    name=\"test\", \n",
-    "    description=\"test description\", \n",
-    ")\n",
-    "dataset_instance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "ExperimentModel(id=0, name='test', description='test description', tags='tag1', result=0.5)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "experiment_instance = ExperimentModel(\n",
-    "    **dataset_instance.model_dump(),\n",
-    "    tags=\"tag1\",\n",
-    "    result=MetricResult(result=0.5, reason=\"test reason\"),\n",
-    ")\n",
-    "experiment_instance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import Project"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tmp_root_dir = get_test_directory()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "p = Project.create(name=\"test_project\", backend=\"local\", root_dir=tmp_root_dir)\n",
-    "dataset_with_dataset_model = p.create_dataset(name=\"dataset_with_dataset_model\", model=DatasetModel)\n",
-    "dataset_with_experiment_model = p.create_dataset(name=\"dataset_with_experiment_model\", model=ExperimentModel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "ExperimentModel(id=0, name='test', description='test description', tags='tag1', result=0.5)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "experiment_instance = ExperimentModel(\n",
-    "    **dataset_instance.model_dump(),\n",
-    "    tags=\"tag1\",\n",
-    "    result=MetricResult(result=0.5, reason=\"test reason\"),\n",
-    ")\n",
-    "experiment_instance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def append(self: Dataset, entry: BaseModelType) -> None:\n",
-    "    \"\"\"Add a new entry to the dataset and sync to backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        entry: The entry to add to the dataset\n",
-    "    \"\"\"\n",
-    "    if not isinstance(entry, self.model):\n",
-    "        raise TypeError(f\"Entry must be an instance of {self.model.__name__}\")\n",
-    "        \n",
-    "    # Add to backend and get ID\n",
-    "    row_id = self._backend.append_entry(entry)\n",
-    "    \n",
-    "    # Store the ID\n",
-    "    entry._row_id = row_id\n",
-    "    \n",
-    "    # Add to local cache\n",
-    "    self._entries.append(entry)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1, 1)"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_dataset_model.append(dataset_instance)\n",
-    "dataset_with_experiment_model.append(experiment_instance)\n",
-    "len(dataset_with_dataset_model), len(dataset_with_experiment_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from fastcore.test import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "test_eq(len(dataset_with_dataset_model), 1)\n",
-    "test_eq(len(dataset_with_experiment_model), 1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def pop(self: Dataset, index: int = -1) -> BaseModelType:\n",
-    "    \"\"\"Remove and return entry at index, sync deletion to backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        index: The index of the entry to remove (default: -1, the last entry)\n",
-    "        \n",
-    "    Returns:\n",
-    "        The removed entry\n",
-    "    \"\"\"\n",
-    "    # Get the entry\n",
-    "    entry = self._entries[index]\n",
-    "    \n",
-    "    # Get the row ID\n",
-    "    row_id = getattr(entry, \"_row_id\", None)\n",
-    "    if row_id is None:\n",
-    "        raise ValueError(\"Entry has no row ID. This likely means it was not added or synced to the dataset.\")\n",
-    "    \n",
-    "    # Delete from backend\n",
-    "    self._backend.delete_entry(row_id)\n",
-    "    \n",
-    "    # Remove from local cache\n",
-    "    return self._entries.pop(index)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(0, 0)"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_dataset_model.pop()\n",
-    "dataset_with_experiment_model.pop()\n",
-    "len(dataset_with_dataset_model), len(dataset_with_experiment_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "test_eq(len(dataset_with_dataset_model), 0)\n",
-    "test_eq(len(dataset_with_experiment_model), 0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# now add some more entries\n",
-    "for i in range(10):\n",
-    "    dataset_with_dataset_model.append(dataset_instance)\n",
-    "    dataset_with_experiment_model.append(experiment_instance)\n",
-    "\n",
-    "test_eq(len(dataset_with_dataset_model), 10)\n",
-    "test_eq(len(dataset_with_experiment_model), 10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def load(self: Dataset) -> None:\n",
-    "    \"\"\"Load all entries from the backend.\"\"\"\n",
-    "    # Get entries from backend\n",
-    "    self._entries = self._backend.load_entries(self.model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset_with_dataset_model.load()\n",
-    "dataset_with_experiment_model.load()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def load_as_dicts(self: Dataset) -> t.List[t.Dict]:\n",
-    "    \"\"\"Load all entries as dictionaries.\n",
-    "    \n",
-    "    Returns:\n",
-    "        List of dictionaries representing the entries\n",
-    "    \"\"\"\n",
-    "    # Make sure we have entries\n",
-    "    if not self._entries:\n",
-    "        self.load()\n",
-    "    \n",
-    "    # Convert to dictionaries\n",
-    "    return [entry.model_dump() for entry in self._entries]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'},\n",
-       " {'id': 0, 'name': 'test', 'description': 'test description'}]"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_dataset_model.load_as_dicts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'}]"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_experiment_model.load_as_dicts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def to_pandas(self: Dataset) -> \"pd.DataFrame\":\n",
-    "    \"\"\"Convert dataset to pandas DataFrame.\"\"\"\n",
-    "\n",
-    "    # Make sure we have data\n",
-    "    if not self._entries:\n",
-    "        self.load()\n",
-    "    \n",
-    "    # Convert entries to dictionaries\n",
-    "    data = [entry.model_dump() for entry in self._entries]\n",
-    "    return pd.DataFrame(data)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>name</th>\n",
-       "      <th>description</th>\n",
-       "      <th>tags</th>\n",
-       "      <th>result</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>0</td>\n",
-       "      <td>test</td>\n",
-       "      <td>test description</td>\n",
-       "      <td>tag1</td>\n",
-       "      <td>(0, ., 5)</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "   id  name       description  tags     result\n",
-       "0   0  test  test description  tag1  (0, ., 5)\n",
-       "1   0  test  test description  tag1  (0, ., 5)\n",
-       "2   0  test  test description  tag1  (0, ., 5)\n",
-       "3   0  test  test description  tag1  (0, ., 5)\n",
-       "4   0  test  test description  tag1  (0, ., 5)\n",
-       "5   0  test  test description  tag1  (0, ., 5)\n",
-       "6   0  test  test description  tag1  (0, ., 5)\n",
-       "7   0  test  test description  tag1  (0, ., 5)\n",
-       "8   0  test  test description  tag1  (0, ., 5)\n",
-       "9   0  test  test description  tag1  (0, ., 5)"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_experiment_model.to_pandas()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def save(self: Dataset, item: BaseModelType) -> None:\n",
-    "    \"\"\"Save changes to an item to the backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        item: The item to save\n",
-    "    \"\"\"\n",
-    "    if not isinstance(item, self.model):\n",
-    "        raise TypeError(f\"Item must be an instance of {self.model.__name__}\")\n",
-    "    \n",
-    "    # Check if the item has a row ID\n",
-    "    if not hasattr(item, \"_row_id\") or not item._row_id:\n",
-    "        # Try to find it in our entries by matching\n",
-    "        for i, entry in enumerate(self._entries):\n",
-    "            if id(entry) == id(item):  # Check if it's the same object\n",
-    "                if hasattr(entry, \"_row_id\") and entry._row_id:\n",
-    "                    item._row_id = entry._row_id\n",
-    "                    break\n",
-    "    \n",
-    "    if not hasattr(item, \"_row_id\") or not item._row_id:\n",
-    "        raise ValueError(\"Cannot save: item is not from this dataset or was not properly synced\")\n",
-    "    \n",
-    "    # Update in backend\n",
-    "    self._backend.update_entry(item)\n",
-    "    \n",
-    "    # Update in local cache if needed\n",
-    "    self._update_local_entry(item)\n",
-    "    \n",
-    "@patch\n",
-    "def _update_local_entry(self: Dataset, item: BaseModelType) -> None:\n",
-    "    \"\"\"Update an entry in the local cache.\n",
-    "    \n",
-    "    Args:\n",
-    "        item: The item to update\n",
-    "    \"\"\"\n",
-    "    for i, entry in enumerate(self._entries):\n",
-    "        if hasattr(entry, \"_row_id\") and hasattr(item, \"_row_id\") and entry._row_id == item._row_id:\n",
-    "            # If it's not the same object, update our copy\n",
-    "            if id(entry) != id(item):\n",
-    "                self._entries[i] = item\n",
-    "            break"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "ExperimentModel(id=0, name='test', description='test description', tags='tag1', result='0.5')"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "d = dataset_with_experiment_model[0]\n",
-    "d"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'updated name'"
-      ]
-     },
-     "execution_count": 33,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "d.name = \"updated name\"\n",
-    "dataset_with_experiment_model.save(d)\n",
-    "dataset_with_experiment_model[0].name"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'id': 0,\n",
-       "  'name': 'updated name',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'},\n",
-       " {'id': 0,\n",
-       "  'name': 'test',\n",
-       "  'description': 'test description',\n",
-       "  'tags': 'tag1',\n",
-       "  'result': '0.5'}]"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_experiment_model.load_as_dicts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get(self: Dataset, field_value: t.Any, field_name: str = \"_row_id\") -> t.Optional[BaseModelType]:\n",
-    "    \"\"\"Get an entry by field value.\n",
-    "    \n",
-    "    Args:\n",
-    "        field_value: The value to match\n",
-    "        field_name: The field to match against (default: \"_row_id\")\n",
-    "        \n",
-    "    Returns:\n",
-    "        The matching model instance or None if not found\n",
-    "    \"\"\"\n",
-    "    # Check if we need to load entries\n",
-    "    if not self._entries:\n",
-    "        self.load()\n",
-    "    \n",
-    "    # Search in local entries first\n",
-    "    for entry in self._entries:\n",
-    "        if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:\n",
-    "            return entry\n",
-    "    \n",
-    "    # If not found, try to get from backend\n",
-    "    if field_name == \"_row_id\":\n",
-    "        # Special case for row IDs\n",
-    "        for entry in self._entries:\n",
-    "            if hasattr(entry, \"_row_id\") and entry._row_id == field_value:\n",
-    "                return entry\n",
-    "    else:\n",
-    "        # Use backend to search\n",
-    "        return self._backend.get_entry_by_field(field_name, field_value, self.model)\n",
-    "    \n",
-    "    return None"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'36eeed65-2105-4900-b9bb-bb42ddc35820'"
-      ]
-     },
-     "execution_count": 37,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "d._row_id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "ExperimentModel(id=0, name='updated name', description='test description', tags='tag1', result='0.5')"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_instance = dataset_with_experiment_model.get(d._row_id)\n",
-    "dataset_instance"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def to_pandas(self: Dataset) -> \"pd.DataFrame\":\n",
-    "    \"\"\"Convert dataset to pandas DataFrame.\n",
-    "    \n",
-    "    Returns:\n",
-    "        pd.DataFrame: A DataFrame containing all entries\n",
-    "    \"\"\"\n",
-    "    # Make sure we have data\n",
-    "    if not self._entries:\n",
-    "        self.load()\n",
-    "    \n",
-    "    # Convert entries to dictionaries\n",
-    "    data = [entry.model_dump() for entry in self._entries]\n",
-    "    return pd.DataFrame(data)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/embedding/base.ipynb b/experimental/old_nbs/api/embedding/base.ipynb
deleted file mode 100644
index ffab22416..000000000
--- a/experimental/old_nbs/api/embedding/base.ipynb
+++ /dev/null
@@ -1,1150 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#| default_exp embedding.base"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Embeddings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "from abc import ABC, abstractmethod\n",
-    "\n",
-    "#TODO: Add support for other providers like HuggingFace, Cohere, etc.\n",
-    "#TODO: handle async calls properly and ensure that the client supports async if needed.\n",
-    "\n",
-    "class BaseEmbedding(ABC):\n",
-    "    @abstractmethod\n",
-    "    def embed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    async def aembed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def embed_document(self, documents: t.List[str], **kwargs: t.Any) -> t.List[t.List[float]]:\n",
-    "        pass\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    async def aembed_document(self, documents: t.List[str], **kwargs: t.Any) -> t.List[t.List[float]]:\n",
-    "        pass\n",
-    "\n",
-    "\n",
-    "class OpenAIEmbeddings(BaseEmbedding):\n",
-    "    def __init__(self, client: t.Any, model: str):\n",
-    "        self.client = client\n",
-    "        self.model = model\n",
-    "    \n",
-    "    def embed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:\n",
-    "        return self.client.embeddings.create(input=text, model=self.model, **kwargs).data[0].embedding\n",
-    "    \n",
-    "    async def aembed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:\n",
-    "        response = await self.client.embeddings.create(input=text, model=self.model, **kwargs)\n",
-    "        return response.data[0].embedding\n",
-    "    \n",
-    "    def embed_document(self, documents: t.List[str], **kwargs: t.Any) -> t.List[t.List[float]]:\n",
-    "        embeddings = self.client.embeddings.create(input=documents, model=self.model, **kwargs)\n",
-    "        return [embedding.embedding for embedding in embeddings.data]\n",
-    "    \n",
-    "    async def aembed_document(self, documents: t.List[str], **kwargs: t.Any) -> t.List[t.List[float]]:\n",
-    "        embeddings = await self.client.embeddings.create(input=documents, model=self.model, **kwargs)\n",
-    "        return [embedding.embedding for embedding in embeddings.data]\n",
-    "    \n",
-    "    \n",
-    "def ragas_embedding(provider: str, model: str, client: t.Any) -> BaseEmbedding:\n",
-    "    \"\"\"\n",
-    "    Factory function to create an embedding instance based on the provider.\n",
-    "    \n",
-    "    Args:\n",
-    "        provider (str): The name of the embedding provider (e.g., \"openai\").\n",
-    "        model (str): The model name to use for embeddings.\n",
-    "        **kwargs: Additional arguments for the provider's client.\n",
-    "    \n",
-    "    Returns:\n",
-    "        BaseEmbedding: An instance of the specified embedding provider.\n",
-    "    \"\"\"\n",
-    "    if provider.lower() == \"openai\":\n",
-    "        return OpenAIEmbeddings(client=client, model=model)\n",
-    "    \n",
-    "    raise ValueError(f\"Unsupported provider: {provider}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example Usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[-0.019184619188308716,\n",
-       " -0.025279032066464424,\n",
-       " -0.0017195191467180848,\n",
-       " 0.01884828321635723,\n",
-       " -0.033795066177845,\n",
-       " -0.01969585195183754,\n",
-       " -0.02094702236354351,\n",
-       " 0.051580529659986496,\n",
-       " -0.03212684020400047,\n",
-       " -0.030377890914678574,\n",
-       " -0.002145825419574976,\n",
-       " -0.028978731483221054,\n",
-       " -0.0024737531784921885,\n",
-       " -0.031481072306632996,\n",
-       " 0.010332250036299229,\n",
-       " 0.018606122583150864,\n",
-       " -0.04614533483982086,\n",
-       " 0.04146353527903557,\n",
-       " 0.0004418617463670671,\n",
-       " 0.04122137278318405,\n",
-       " 0.05367926508188248,\n",
-       " 0.0018733929609879851,\n",
-       " 0.0045674461871385574,\n",
-       " 0.010022819973528385,\n",
-       " 0.04786737635731697,\n",
-       " 0.0022013208363205194,\n",
-       " -0.009834472090005875,\n",
-       " 0.03847686946392059,\n",
-       " 0.00089213193859905,\n",
-       " -0.05211866647005081,\n",
-       " 0.051150016486644745,\n",
-       " -0.032557349652051926,\n",
-       " -0.014031948521733284,\n",
-       " -0.012632790021598339,\n",
-       " 0.013271828182041645,\n",
-       " 0.018565760925412178,\n",
-       " 0.0016068464610725641,\n",
-       " -0.0008185583865270019,\n",
-       " -0.012753871269524097,\n",
-       " -0.029705218970775604,\n",
-       " -0.004443001933395863,\n",
-       " -0.015323479659855366,\n",
-       " 0.025655729696154594,\n",
-       " 0.009107985533773899,\n",
-       " -0.03686245530843735,\n",
-       " 0.020328164100646973,\n",
-       " -0.04071014001965523,\n",
-       " -0.002621741034090519,\n",
-       " 0.03549019992351532,\n",
-       " 0.04851314052939415,\n",
-       " -0.03368743881583214,\n",
-       " -0.002441801130771637,\n",
-       " 0.017260776832699776,\n",
-       " 0.07598508894443512,\n",
-       " 0.0009232430020347238,\n",
-       " -0.04267434403300285,\n",
-       " 0.008381499908864498,\n",
-       " 0.0760388970375061,\n",
-       " -0.047275424003601074,\n",
-       " 0.015081318095326424,\n",
-       " 0.014247204177081585,\n",
-       " 0.024700535461306572,\n",
-       " 0.010197714902460575,\n",
-       " -0.000978738535195589,\n",
-       " 0.013789786025881767,\n",
-       " -0.010103541426360607,\n",
-       " -0.020704859867691994,\n",
-       " -0.001531170797534287,\n",
-       " -0.011717955581843853,\n",
-       " 0.04934725537896156,\n",
-       " 0.0010939337080344558,\n",
-       " 0.037831101566553116,\n",
-       " -0.019332608208060265,\n",
-       " 0.005855614319443703,\n",
-       " -0.046279869973659515,\n",
-       " -0.0045439028181135654,\n",
-       " -0.022359633818268776,\n",
-       " 0.008751469664275646,\n",
-       " -0.02657056413590908,\n",
-       " -0.05440575256943703,\n",
-       " -0.04423494264483452,\n",
-       " 0.019332608208060265,\n",
-       " -0.03091602772474289,\n",
-       " -0.06037908419966698,\n",
-       " -0.018888644874095917,\n",
-       " 0.004372371360659599,\n",
-       " -0.02389332838356495,\n",
-       " -0.012027384713292122,\n",
-       " -0.016601556912064552,\n",
-       " 0.0022013208363205194,\n",
-       " -0.00802498310804367,\n",
-       " 0.01529657281935215,\n",
-       " -0.014960236847400665,\n",
-       " 0.01245789509266615,\n",
-       " 0.014502819627523422,\n",
-       " -0.027687201276421547,\n",
-       " -0.022790145128965378,\n",
-       " 0.05666593089699745,\n",
-       " 0.061024848371744156,\n",
-       " -0.04929343983530998,\n",
-       " 0.014610446989536285,\n",
-       " -0.027323957532644272,\n",
-       " 0.013251648284494877,\n",
-       " -0.0205434188246727,\n",
-       " 0.0298666600137949,\n",
-       " 0.022507622838020325,\n",
-       " 0.00819987803697586,\n",
-       " -0.04068323224782944,\n",
-       " -0.026584018021821976,\n",
-       " 0.004533812869340181,\n",
-       " -0.12474039196968079,\n",
-       " 0.009417415596544743,\n",
-       " 0.031803958117961884,\n",
-       " -0.031077470630407333,\n",
-       " 0.005801800638437271,\n",
-       " 0.030835308134555817,\n",
-       " 0.05367926508188248,\n",
-       " -0.039553143084049225,\n",
-       " 0.02342245727777481,\n",
-       " -0.05375998839735985,\n",
-       " 0.00868420209735632,\n",
-       " -0.01152287982404232,\n",
-       " 0.019534409046173096,\n",
-       " -0.04184022918343544,\n",
-       " -0.043131761252880096,\n",
-       " -0.04297031834721565,\n",
-       " 0.005852250847965479,\n",
-       " 0.057526953518390656,\n",
-       " -0.031481072306632996,\n",
-       " 0.019911106675863266,\n",
-       " 0.03944551572203636,\n",
-       " 0.03982221335172653,\n",
-       " 0.01127399131655693,\n",
-       " -0.0002850449818652123,\n",
-       " -0.045553382486104965,\n",
-       " 0.0018666662508621812,\n",
-       " -0.040656328201293945,\n",
-       " -0.013446723110973835,\n",
-       " -0.049105092883110046,\n",
-       " 0.047275424003601074,\n",
-       " 0.056450676172971725,\n",
-       " -0.047248516231775284,\n",
-       " -0.010890567675232887,\n",
-       " -0.00996228028088808,\n",
-       " -0.005926244892179966,\n",
-       " -0.04119446501135826,\n",
-       " -0.008791829459369183,\n",
-       " 0.026086239144206047,\n",
-       " -0.009948826394975185,\n",
-       " -0.00625585438683629,\n",
-       " 0.030377890914678574,\n",
-       " 0.060648154467344284,\n",
-       " -0.051230739802122116,\n",
-       " 0.025776810944080353,\n",
-       " 0.00377705623395741,\n",
-       " -0.002621741034090519,\n",
-       " 0.024512186646461487,\n",
-       " -0.016816813498735428,\n",
-       " -0.02782173454761505,\n",
-       " 0.015054411254823208,\n",
-       " 0.05510533228516579,\n",
-       " 0.039580050855875015,\n",
-       " -0.04436947777867317,\n",
-       " -0.007897174917161465,\n",
-       " -0.008146064355969429,\n",
-       " 0.00850930716842413,\n",
-       " -0.011744862422347069,\n",
-       " 0.002426665974780917,\n",
-       " -0.04361608624458313,\n",
-       " -0.002248407807201147,\n",
-       " 0.023974047973752022,\n",
-       " 0.020933568477630615,\n",
-       " -0.0211219172924757,\n",
-       " -0.04509596526622772,\n",
-       " -0.0192249808460474,\n",
-       " 0.02634185552597046,\n",
-       " 0.023449363186955452,\n",
-       " -0.04958941787481308,\n",
-       " -0.01622486114501953,\n",
-       " -0.025238672271370888,\n",
-       " 0.02852131426334381,\n",
-       " 0.04541884735226631,\n",
-       " 0.0022921315394341946,\n",
-       " 0.019090445712208748,\n",
-       " -0.026584018021821976,\n",
-       " -0.011179816909134388,\n",
-       " -0.004473272245377302,\n",
-       " -0.006804082542657852,\n",
-       " -0.011913030408322811,\n",
-       " 0.0008563962182961404,\n",
-       " -0.03298785910010338,\n",
-       " 0.056235421448946,\n",
-       " 0.023476270958781242,\n",
-       " 0.0019675670191645622,\n",
-       " 0.004510269034653902,\n",
-       " -0.03659338504076004,\n",
-       " 0.0669981837272644,\n",
-       " 0.00536792678758502,\n",
-       " -0.021565880626440048,\n",
-       " 0.02427002415060997,\n",
-       " -0.00038993984344415367,\n",
-       " 0.012706783600151539,\n",
-       " -0.05136527121067047,\n",
-       " -0.031884677708148956,\n",
-       " -0.02342245727777481,\n",
-       " -0.04186713695526123,\n",
-       " -1.4254876077757217e-05,\n",
-       " 0.07087277621030807,\n",
-       " -0.00837477296590805,\n",
-       " -0.05246845632791519,\n",
-       " 0.058603230863809586,\n",
-       " -0.014677714556455612,\n",
-       " -0.0541904978454113,\n",
-       " -0.0020482877735048532,\n",
-       " -0.04932034760713577,\n",
-       " -0.017879635095596313,\n",
-       " 0.041275184601545334,\n",
-       " 0.02229236625134945,\n",
-       " -0.011226904578506947,\n",
-       " -0.03161560744047165,\n",
-       " -0.07937535643577576,\n",
-       " 0.07157235592603683,\n",
-       " 0.08513343334197998,\n",
-       " -0.04122137278318405,\n",
-       " 0.030889121815562248,\n",
-       " -0.013339095748960972,\n",
-       " -0.008536214008927345,\n",
-       " -0.008213330991566181,\n",
-       " 0.04996611550450325,\n",
-       " 0.01458354014903307,\n",
-       " 0.020879754796624184,\n",
-       " 0.01826978474855423,\n",
-       " 0.02429693192243576,\n",
-       " -0.021431345492601395,\n",
-       " -0.010500418022274971,\n",
-       " -0.004325284156948328,\n",
-       " 0.036727920174598694,\n",
-       " -0.021350625902414322,\n",
-       " -0.005657176021486521,\n",
-       " -0.0071572354063391685,\n",
-       " -0.0387459360063076,\n",
-       " -0.0011199996806681156,\n",
-       " -0.006037235725671053,\n",
-       " 0.034252483397722244,\n",
-       " 0.04563410207629204,\n",
-       " -0.016103779897093773,\n",
-       " -0.042728159576654434,\n",
-       " -0.022413447499275208,\n",
-       " 0.011119276285171509,\n",
-       " 0.04076395556330681,\n",
-       " 0.017960356548428535,\n",
-       " 0.02724323607981205,\n",
-       " 0.005418376997113228,\n",
-       " -0.02036852389574051,\n",
-       " 0.017166603356599808,\n",
-       " -0.01021116878837347,\n",
-       " 0.006659457925707102,\n",
-       " -0.027458492666482925,\n",
-       " 0.042728159576654434,\n",
-       " -0.02106810361146927,\n",
-       " -0.048728395253419876,\n",
-       " -0.062101125717163086,\n",
-       " -0.035301852971315384,\n",
-       " -0.02779482863843441,\n",
-       " 0.012632790021598339,\n",
-       " -0.027404678985476494,\n",
-       " 0.004089849069714546,\n",
-       " -0.013897414319217205,\n",
-       " -0.016615010797977448,\n",
-       " -0.013164200820028782,\n",
-       " 0.04385824874043465,\n",
-       " -0.0075810193084180355,\n",
-       " 0.03266497701406479,\n",
-       " -0.004355554468929768,\n",
-       " -0.025803716853260994,\n",
-       " 0.0032876869663596153,\n",
-       " -0.005179578438401222,\n",
-       " -0.017328044399619102,\n",
-       " -0.01981693133711815,\n",
-       " 0.0369969867169857,\n",
-       " -0.025763357058167458,\n",
-       " -0.0014664260670542717,\n",
-       " 0.010513870976865292,\n",
-       " 0.033983416855335236,\n",
-       " -0.05131145939230919,\n",
-       " 0.008832190185785294,\n",
-       " 0.027081795036792755,\n",
-       " -0.01144888624548912,\n",
-       " 0.007722280453890562,\n",
-       " -0.02479470893740654,\n",
-       " 0.03277260437607765,\n",
-       " 0.02774101495742798,\n",
-       " 0.016278674826025963,\n",
-       " -0.02039542980492115,\n",
-       " 0.025911344215273857,\n",
-       " -0.002879038453102112,\n",
-       " -0.0013175972271710634,\n",
-       " -0.041651882231235504,\n",
-       " 0.038153983652591705,\n",
-       " 0.0025460654869675636,\n",
-       " 0.07695373892784119,\n",
-       " 0.0007592791225761175,\n",
-       " 0.04294341430068016,\n",
-       " -0.005845523905009031,\n",
-       " -0.001709428965114057,\n",
-       " 0.04154425486922264,\n",
-       " 0.015901979058980942,\n",
-       " -0.01701861433684826,\n",
-       " 0.05951806530356407,\n",
-       " -0.0013714110245928168,\n",
-       " -0.008959997445344925,\n",
-       " 0.009585583582520485,\n",
-       " 0.05666593089699745,\n",
-       " -0.02784864231944084,\n",
-       " 0.01347362995147705,\n",
-       " -0.045849356800317764,\n",
-       " 0.019857292994856834,\n",
-       " -0.019332608208060265,\n",
-       " 0.0009694892796687782,\n",
-       " -0.04003746807575226,\n",
-       " 0.023449363186955452,\n",
-       " -0.06199349835515022,\n",
-       " 0.009477955289185047,\n",
-       " -0.015713630244135857,\n",
-       " -0.015162038616836071,\n",
-       " -0.00862366147339344,\n",
-       " 0.045553382486104965,\n",
-       " 0.021538974717259407,\n",
-       " 0.0020180174615234137,\n",
-       " 0.013756153173744678,\n",
-       " 0.014664260670542717,\n",
-       " -0.02706834115087986,\n",
-       " -0.004664984066039324,\n",
-       " 0.010830027051270008,\n",
-       " 0.007224502973258495,\n",
-       " -0.016951346769928932,\n",
-       " -0.04372371360659599,\n",
-       " 0.05427121743559837,\n",
-       " 0.012767324224114418,\n",
-       " 0.04579554498195648,\n",
-       " -0.02657056413590908,\n",
-       " -0.027902456000447273,\n",
-       " 0.02179458923637867,\n",
-       " -0.03651266545057297,\n",
-       " -0.011987023986876011,\n",
-       " -0.0041941129602491856,\n",
-       " 0.033929601311683655,\n",
-       " -0.02712215483188629,\n",
-       " 0.004288287367671728,\n",
-       " 0.004399278201162815,\n",
-       " -0.017381858080625534,\n",
-       " -0.005243482068181038,\n",
-       " 0.016413209959864616,\n",
-       " -0.02464671991765499,\n",
-       " -0.01762402057647705,\n",
-       " -0.009868105873465538,\n",
-       " 0.0716799795627594,\n",
-       " -0.024727441370487213,\n",
-       " -0.019534409046173096,\n",
-       " 0.021256450563669205,\n",
-       " -0.006609007250517607,\n",
-       " -0.006915073376148939,\n",
-       " 0.00413020933046937,\n",
-       " -0.01210810523480177,\n",
-       " 0.03384888172149658,\n",
-       " 0.030431704595685005,\n",
-       " -0.007258136291056871,\n",
-       " -0.04081776738166809,\n",
-       " -0.007345583755522966,\n",
-       " 0.04385824874043465,\n",
-       " 0.013298735953867435,\n",
-       " 0.01475843507796526,\n",
-       " 0.032153744250535965,\n",
-       " -0.0036324316170066595,\n",
-       " -0.03479062393307686,\n",
-       " -0.015175491571426392,\n",
-       " 0.0117986761033535,\n",
-       " -0.00017373869195580482,\n",
-       " 0.059625692665576935,\n",
-       " -0.009249246679246426,\n",
-       " 0.04036035016179085,\n",
-       " 0.03371434658765793,\n",
-       " -0.019736211746931076,\n",
-       " -0.026610923931002617,\n",
-       " 0.010325523093342781,\n",
-       " -0.005855614319443703,\n",
-       " 0.0206914059817791,\n",
-       " 0.011381618678569794,\n",
-       " -0.01701861433684826,\n",
-       " 0.008576574735343456,\n",
-       " 0.03352599963545799,\n",
-       " -0.011563240550458431,\n",
-       " 0.004426185041666031,\n",
-       " 0.00951158907264471,\n",
-       " 0.007809727918356657,\n",
-       " -0.01757020689547062,\n",
-       " -0.021808043122291565,\n",
-       " -0.015188945457339287,\n",
-       " -0.022682517766952515,\n",
-       " -0.05763458088040352,\n",
-       " 0.04716779664158821,\n",
-       " -0.023664619773626328,\n",
-       " 0.007527205627411604,\n",
-       " 0.011401799507439137,\n",
-       " -0.02022053487598896,\n",
-       " -0.03347218409180641,\n",
-       " 0.012229186482727528,\n",
-       " 0.05112311244010925,\n",
-       " -0.0036391583271324635,\n",
-       " -0.023503176867961884,\n",
-       " 0.004083122126758099,\n",
-       " -0.052280109375715256,\n",
-       " 0.033956509083509445,\n",
-       " 0.03191158547997475,\n",
-       " -0.025036871433258057,\n",
-       " 0.00199615559540689,\n",
-       " -0.023261016234755516,\n",
-       " -0.03928407281637192,\n",
-       " -0.0007407806115224957,\n",
-       " -0.0041201189160346985,\n",
-       " 0.00614150008186698,\n",
-       " 0.019036632031202316,\n",
-       " -0.014153029769659042,\n",
-       " 0.025911344215273857,\n",
-       " -0.032557349652051926,\n",
-       " 0.04006437584757805,\n",
-       " 0.03062005341053009,\n",
-       " -0.028063897043466568,\n",
-       " 0.0187944695353508,\n",
-       " -0.08260418474674225,\n",
-       " -0.0015959155280143023,\n",
-       " -0.03573236241936684,\n",
-       " -0.00360216130502522,\n",
-       " 0.03624359518289566,\n",
-       " 0.02631494775414467,\n",
-       " -0.04617224261164665,\n",
-       " 0.002162642078474164,\n",
-       " -0.006302941590547562,\n",
-       " 0.058603230863809586,\n",
-       " 0.02322065457701683,\n",
-       " -0.0025494287256151438,\n",
-       " 0.009013812057673931,\n",
-       " 0.008832190185785294,\n",
-       " 0.0022988582495599985,\n",
-       " -0.009350148029625416,\n",
-       " -0.05384070798754692,\n",
-       " -0.003153152298182249,\n",
-       " -0.013857053592801094,\n",
-       " -0.040548697113990784,\n",
-       " 0.017812367528676987,\n",
-       " 0.0035248040221631527,\n",
-       " -0.04358917847275734,\n",
-       " 0.013177654705941677,\n",
-       " 0.013978134840726852,\n",
-       " 0.03134653717279434,\n",
-       " 0.015175491571426392,\n",
-       " -0.0002869368763640523,\n",
-       " 0.01687062717974186,\n",
-       " 0.01992456056177616,\n",
-       " 0.026449482887983322,\n",
-       " -0.0039048639591783285,\n",
-       " 0.0231668408960104,\n",
-       " -0.04773284122347832,\n",
-       " 0.052172478288412094,\n",
-       " 0.006410568952560425,\n",
-       " -0.0035718909930437803,\n",
-       " -0.02284395880997181,\n",
-       " 0.023328281939029694,\n",
-       " -0.016305582597851753,\n",
-       " -0.02229236625134945,\n",
-       " -0.012525161728262901,\n",
-       " 0.025077231228351593,\n",
-       " 0.008226784877479076,\n",
-       " -0.023758793249726295,\n",
-       " -0.020314710214734077,\n",
-       " -0.018202519044280052,\n",
-       " -0.05445956811308861,\n",
-       " 0.01547146774828434,\n",
-       " -0.044154223054647446,\n",
-       " 0.0001709008647594601,\n",
-       " 0.027525758370757103,\n",
-       " 0.007002520840615034,\n",
-       " 0.04143662750720978,\n",
-       " 0.02919398620724678,\n",
-       " -0.003316275542601943,\n",
-       " 0.009773931466042995,\n",
-       " -0.07211049646139145,\n",
-       " 0.026732005178928375,\n",
-       " -0.004042761866003275,\n",
-       " -0.010231348685920238,\n",
-       " -0.034333206713199615,\n",
-       " 0.06193968653678894,\n",
-       " 0.0640922337770462,\n",
-       " -0.015484921634197235,\n",
-       " -0.009706663899123669,\n",
-       " -0.008280598558485508,\n",
-       " 0.005670629441738129,\n",
-       " -0.013251648284494877,\n",
-       " -0.002973212394863367,\n",
-       " -0.02879038266837597,\n",
-       " -0.007143781986087561,\n",
-       " -0.04157116264104843,\n",
-       " -0.0066998181864619255,\n",
-       " 0.01987074688076973,\n",
-       " 0.06199349835515022,\n",
-       " -0.006968887057155371,\n",
-       " -0.04687182232737541,\n",
-       " -0.014193389564752579,\n",
-       " 0.007399397436529398,\n",
-       " -0.03374125435948372,\n",
-       " -0.043481551110744476,\n",
-       " -0.008139337413012981,\n",
-       " 0.007634832989424467,\n",
-       " -0.005532731302082539,\n",
-       " 0.012087925337255001,\n",
-       " -0.003134653903543949,\n",
-       " 0.009518316015601158,\n",
-       " 0.028252245858311653,\n",
-       " -0.012000477872788906,\n",
-       " -0.030835308134555817,\n",
-       " 0.026624377816915512,\n",
-       " 0.032557349652051926,\n",
-       " -0.006575373932719231,\n",
-       " -0.00798462238162756,\n",
-       " -0.0033515908289700747,\n",
-       " 0.019386421889066696,\n",
-       " -0.05160743370652199,\n",
-       " -0.022104019299149513,\n",
-       " 0.008516034111380577,\n",
-       " 0.027875548228621483,\n",
-       " 0.019628584384918213,\n",
-       " 0.004991230089217424,\n",
-       " 0.028655849397182465,\n",
-       " 0.01359471119940281,\n",
-       " -0.007782821077853441,\n",
-       " -0.01109909638762474,\n",
-       " -0.0005763962399214506,\n",
-       " 0.011953390203416348,\n",
-       " -0.004738977644592524,\n",
-       " -0.022790145128965378,\n",
-       " 0.007096694782376289,\n",
-       " 0.02948996238410473,\n",
-       " -0.006481199525296688,\n",
-       " -0.0007987986318767071,\n",
-       " -0.011475793085992336,\n",
-       " -0.00785008817911148,\n",
-       " 0.04687182232737541,\n",
-       " 0.006397115532308817,\n",
-       " -0.002424984471872449,\n",
-       " 0.025157952681183815,\n",
-       " 0.00809897668659687,\n",
-       " -0.016332488507032394,\n",
-       " -0.013897414319217205,\n",
-       " -0.012081198394298553,\n",
-       " 0.03387578949332237,\n",
-       " 0.0027613206766545773,\n",
-       " -0.02149861305952072,\n",
-       " -0.006656094454228878,\n",
-       " 0.015148584730923176,\n",
-       " 0.06586809456348419,\n",
-       " 0.004765884950757027,\n",
-       " -0.010439877398312092,\n",
-       " 0.013762879185378551,\n",
-       " 0.027956269681453705,\n",
-       " -9.002249862533063e-05,\n",
-       " 0.03177705034613609,\n",
-       " 0.007190869189798832,\n",
-       " -0.0212699044495821,\n",
-       " -0.03772347420454025,\n",
-       " -0.038530681282281876,\n",
-       " -0.03616287559270859,\n",
-       " -0.024014407768845558,\n",
-       " -0.026032425463199615,\n",
-       " -0.06387697905302048,\n",
-       " 0.021175730973482132,\n",
-       " -0.007587745785713196,\n",
-       " 0.033929601311683655,\n",
-       " 0.026355309411883354,\n",
-       " 0.0013167564757168293,\n",
-       " -0.004880239255726337,\n",
-       " -0.004715434275567532,\n",
-       " -0.0167495459318161,\n",
-       " -0.0015866663306951523,\n",
-       " 0.029705218970775604,\n",
-       " -0.04119446501135826,\n",
-       " 0.048755303025245667,\n",
-       " 0.02182149700820446,\n",
-       " 0.014368284493684769,\n",
-       " 0.024700535461306572,\n",
-       " -0.032207559794187546,\n",
-       " 0.012188825756311417,\n",
-       " 0.003978857770562172,\n",
-       " 0.009249246679246426,\n",
-       " 0.04264743626117706,\n",
-       " 0.0012848045444115996,\n",
-       " -0.0352480411529541,\n",
-       " -0.018000716343522072,\n",
-       " -0.02034161612391472,\n",
-       " -0.029382335022091866,\n",
-       " 0.03702389448881149,\n",
-       " 0.011785222217440605,\n",
-       " 0.006400479003787041,\n",
-       " -0.022238552570343018,\n",
-       " -0.04845932871103287,\n",
-       " 0.027552666142582893,\n",
-       " -0.014166482724249363,\n",
-       " -0.01102510280907154,\n",
-       " -0.0018464860040694475,\n",
-       " 0.0025527921970933676,\n",
-       " -0.04958941787481308,\n",
-       " -0.024956149980425835,\n",
-       " 0.03772347420454025,\n",
-       " -0.021565880626440048,\n",
-       " -0.05410977825522423,\n",
-       " -0.004147026222199202,\n",
-       " 0.03053933195769787,\n",
-       " -0.011354711838066578,\n",
-       " 0.011778495274484158,\n",
-       " -0.015202398411929607,\n",
-       " -0.021888762712478638,\n",
-       " -0.008253691717982292,\n",
-       " -0.042378369718790054,\n",
-       " 0.0026671465020626783,\n",
-       " 0.028225338086485863,\n",
-       " -0.00250906846486032,\n",
-       " 0.016789905726909637,\n",
-       " -0.018606122583150864,\n",
-       " 0.0023072666954249144,\n",
-       " -0.02369152568280697,\n",
-       " 0.01987074688076973,\n",
-       " 0.012901858426630497,\n",
-       " 0.014960236847400665,\n",
-       " 0.0059800585731863976,\n",
-       " -0.0016825221246108413,\n",
-       " -0.006575373932719231,\n",
-       " -0.005008046980947256,\n",
-       " -0.008657295256853104,\n",
-       " -0.01654774323105812,\n",
-       " 0.00396204087883234,\n",
-       " -0.02334173582494259,\n",
-       " 0.04958941787481308,\n",
-       " 0.020852847024798393,\n",
-       " 0.0028454046696424484,\n",
-       " -0.01757020689547062,\n",
-       " 0.05203794687986374,\n",
-       " 0.014260657131671906,\n",
-       " 0.013083480298519135,\n",
-       " 0.03137344494462013,\n",
-       " 0.009531769901514053,\n",
-       " -0.013339095748960972,\n",
-       " 0.026705099269747734,\n",
-       " 0.004022581502795219,\n",
-       " 0.0033717709593474865,\n",
-       " 0.0017573569202795625,\n",
-       " 0.012908585369586945,\n",
-       " -0.020489605143666267,\n",
-       " -0.028117710724473,\n",
-       " -0.01844467967748642,\n",
-       " -0.021027741953730583,\n",
-       " 0.02234617993235588,\n",
-       " -0.004634713754057884,\n",
-       " 0.07496262341737747,\n",
-       " -0.016278674826025963,\n",
-       " -0.006239037495106459,\n",
-       " -0.009074351750314236,\n",
-       " 0.010049727745354176,\n",
-       " 0.019467143341898918,\n",
-       " 0.014193389564752579,\n",
-       " -0.008072069846093655,\n",
-       " -0.019561316817998886,\n",
-       " 0.00862366147339344,\n",
-       " -0.014314470812678337,\n",
-       " 0.04251290112733841,\n",
-       " 0.0033566358033567667,\n",
-       " 0.03659338504076004,\n",
-       " 0.0019103899830952287,\n",
-       " -0.030108822509646416,\n",
-       " -0.007305223494768143,\n",
-       " 0.0018733929609879851,\n",
-       " -0.024431465193629265,\n",
-       " 0.01335927564650774,\n",
-       " 0.006326484959572554,\n",
-       " -0.04105992987751961,\n",
-       " -0.03629740700125694,\n",
-       " -0.0020953749772161245,\n",
-       " 0.028924917802214622,\n",
-       " 0.029785938560962677,\n",
-       " 0.01069549284875393,\n",
-       " -0.003615614725276828,\n",
-       " -0.0005154352984391153,\n",
-       " -0.02922089397907257,\n",
-       " -0.021808043122291565,\n",
-       " -0.0036324316170066595,\n",
-       " 0.04243218153715134,\n",
-       " -0.010480238124728203,\n",
-       " -0.03156179562211037,\n",
-       " 0.022709423676133156,\n",
-       " 0.004443001933395863,\n",
-       " -0.01286149863153696,\n",
-       " -0.03826161101460457,\n",
-       " 0.024660173803567886,\n",
-       " -0.011004921980202198,\n",
-       " -0.006393752060830593,\n",
-       " 0.02114882320165634,\n",
-       " 0.026906900107860565,\n",
-       " -0.023462817072868347,\n",
-       " -0.024135489016771317,\n",
-       " 0.03446773812174797,\n",
-       " 0.028036991134285927,\n",
-       " 0.014341377653181553,\n",
-       " -0.04700635373592377,\n",
-       " 0.005378016736358404,\n",
-       " -0.02914017252624035,\n",
-       " 0.0093232411891222,\n",
-       " -0.05881848558783531,\n",
-       " -0.0029210804495960474,\n",
-       " -0.029678311198949814,\n",
-       " -0.060701966285705566,\n",
-       " -0.006797355599701405,\n",
-       " 0.002322401851415634,\n",
-       " -0.034306298941373825,\n",
-       " 0.0004843242058996111,\n",
-       " -0.023651165887713432,\n",
-       " 0.01073585357517004,\n",
-       " -0.021310264244675636,\n",
-       " -0.035005878657102585,\n",
-       " 0.0028050444088876247,\n",
-       " -0.01596924476325512,\n",
-       " 0.03126581758260727,\n",
-       " 0.018256332725286484,\n",
-       " 0.0285482220351696,\n",
-       " -0.01844467967748642,\n",
-       " 0.013688885606825352,\n",
-       " 0.02581717073917389,\n",
-       " 0.0167495459318161,\n",
-       " -0.0010073271114379168,\n",
-       " -0.023826060816645622,\n",
-       " -0.01404540240764618,\n",
-       " 0.015054411254823208,\n",
-       " -0.01493333000689745,\n",
-       " -0.022978492081165314,\n",
-       " 0.02494269609451294,\n",
-       " 0.04407350346446037,\n",
-       " 0.022938132286071777,\n",
-       " -0.016655370593070984,\n",
-       " 0.012807684950530529,\n",
-       " 0.001075435196980834,\n",
-       " 0.001704383990727365,\n",
-       " -0.016386302188038826,\n",
-       " -7.651649502804503e-05,\n",
-       " 0.011771769262850285,\n",
-       " 0.01046005729585886,\n",
-       " -0.028575127944350243,\n",
-       " -0.003598797833546996,\n",
-       " 0.004406005144119263,\n",
-       " -0.012377174571156502,\n",
-       " 0.017704740166664124,\n",
-       " -0.0015740536618977785,\n",
-       " -0.017112787812948227,\n",
-       " 0.021565880626440048,\n",
-       " -0.01887519098818302,\n",
-       " 0.030862214043736458,\n",
-       " 0.00434210104867816,\n",
-       " 0.05147290229797363,\n",
-       " -0.020449243485927582,\n",
-       " 0.006454292684793472,\n",
-       " 0.011926483362913132,\n",
-       " 0.0012721918756142259,\n",
-       " -0.001787627232261002,\n",
-       " 0.003323002252727747,\n",
-       " 0.04606461524963379,\n",
-       " -0.003995674662292004,\n",
-       " 0.01133453194051981,\n",
-       " 0.0022013208363205194,\n",
-       " 0.0026419213972985744,\n",
-       " 0.0064273858442902565,\n",
-       " -0.04157116264104843,\n",
-       " 0.022332727909088135,\n",
-       " -0.042324554175138474,\n",
-       " -0.018431227654218674,\n",
-       " -0.006249127443879843,\n",
-       " 0.009444322437047958,\n",
-       " -0.024108583107590675,\n",
-       " -0.0015706903068348765,\n",
-       " 0.01404540240764618,\n",
-       " -0.017812367528676987,\n",
-       " 0.0015967563958838582,\n",
-       " 0.011516153812408447,\n",
-       " 0.022211646661162376,\n",
-       " -0.04229764640331268,\n",
-       " -0.024175850674510002,\n",
-       " -0.046279869973659515,\n",
-       " -0.01168432179838419,\n",
-       " 0.005357836373150349,\n",
-       " 0.005263662431389093,\n",
-       " 0.044907618314027786,\n",
-       " -0.01824287883937359,\n",
-       " -0.032207559794187546,\n",
-       " 0.010641679167747498,\n",
-       " 0.003783782944083214,\n",
-       " 0.004570809658616781,\n",
-       " -0.04751758649945259,\n",
-       " 0.02071831375360489,\n",
-       " 0.04009127989411354,\n",
-       " 0.004762521479278803,\n",
-       " -0.026678191497921944,\n",
-       " -0.014395191334187984,\n",
-       " 0.008838917128741741,\n",
-       " 0.006434112787246704,\n",
-       " -0.008267145603895187,\n",
-       " 0.021525520831346512,\n",
-       " 0.03406413644552231,\n",
-       " -0.012101378291845322,\n",
-       " -0.012356993742287159,\n",
-       " 0.005690809339284897,\n",
-       " -0.03982221335172653,\n",
-       " 0.006400479003787041,\n",
-       " 0.0035483473911881447,\n",
-       " 0.02304575964808464,\n",
-       " -0.00011897894728463143,\n",
-       " 0.02071831375360489,\n",
-       " 0.008327685296535492,\n",
-       " -0.018552307039499283,\n",
-       " -0.014206843450665474,\n",
-       " 0.046898726373910904,\n",
-       " 0.0218484029173851,\n",
-       " -0.023974047973752022,\n",
-       " 0.014287563972175121,\n",
-       " 0.03376815840601921,\n",
-       " -0.003514713840559125,\n",
-       " -0.018565760925412178,\n",
-       " 0.0023139934055507183,\n",
-       " -0.006820899434387684,\n",
-       " -0.006615734193474054,\n",
-       " 0.006568646989762783,\n",
-       " 0.02922089397907257,\n",
-       " 0.00862366147339344,\n",
-       " -0.01687062717974186,\n",
-       " -0.03522113338112831,\n",
-       " -0.010668586008250713,\n",
-       " 0.0003584083169698715,\n",
-       " -0.0030942936427891254,\n",
-       " 0.0010552549501881003,\n",
-       " -0.0161710474640131,\n",
-       " 0.02601897343993187,\n",
-       " -0.008072069846093655,\n",
-       " 0.021538974717259407,\n",
-       " -0.02456600032746792,\n",
-       " -0.0029093085322529078,\n",
-       " 0.012942219153046608,\n",
-       " -0.043454643338918686,\n",
-       " -0.012854771688580513,\n",
-       " 0.026207320392131805,\n",
-       " -0.006733451969921589,\n",
-       " -0.03209993243217468,\n",
-       " 0.016063420102000237,\n",
-       " -0.026032425463199615,\n",
-       " -0.012195552699267864,\n",
-       " -0.002974894130602479,\n",
-       " -0.01949404925107956,\n",
-       " -0.005391470156610012,\n",
-       " 0.019655490294098854,\n",
-       " 0.018485041335225105,\n",
-       " 0.017139695584774017,\n",
-       " 0.033283837139606476,\n",
-       " -0.014731528237462044,\n",
-       " -0.0006108707166276872,\n",
-       " -0.012377174571156502,\n",
-       " 0.0495356023311615,\n",
-       " 0.050100646913051605,\n",
-       " 0.0015606002416461706,\n",
-       " -0.00031111104181036353,\n",
-       " 0.001344504184089601,\n",
-       " -0.02948996238410473,\n",
-       " 0.020758673548698425,\n",
-       " 0.04474617540836334,\n",
-       " -0.05475554242730141,\n",
-       " 0.02784864231944084,\n",
-       " -0.006649367976933718,\n",
-       " -0.007708827033638954,\n",
-       " 0.022790145128965378,\n",
-       " 0.04264743626117706,\n",
-       " 0.010338976047933102,\n",
-       " 0.006767085287719965,\n",
-       " -0.036028340458869934,\n",
-       " -0.026032425463199615,\n",
-       " 0.01494678296148777,\n",
-       " 0.02324756234884262,\n",
-       " 0.01347362995147705,\n",
-       " 0.008246964775025845,\n",
-       " -0.014341377653181553,\n",
-       " 0.003151470795273781,\n",
-       " -0.0016581377713009715,\n",
-       " -0.00967303104698658,\n",
-       " 0.006259217858314514,\n",
-       " -0.02124299854040146,\n",
-       " -0.010675312951207161,\n",
-       " 0.027202876284718513,\n",
-       " 0.014879516325891018,\n",
-       " 0.009269427508115768,\n",
-       " 0.010675312951207161,\n",
-       " -0.007937535643577576,\n",
-       " 0.02121609076857567,\n",
-       " 0.02779482863843441,\n",
-       " 0.018135251477360725,\n",
-       " -0.007567565888166428,\n",
-       " -0.0042714704759418964,\n",
-       " -0.002071831375360489,\n",
-       " -0.006245764438062906,\n",
-       " 0.0018363959388807416,\n",
-       " -0.014650807715952396,\n",
-       " -0.0521455742418766,\n",
-       " 0.02922089397907257,\n",
-       " 0.024485278874635696,\n",
-       " 0.047975003719329834,\n",
-       " 0.009081078693270683,\n",
-       " 0.015592548996210098,\n",
-       " 0.022238552570343018,\n",
-       " -0.0061784968711435795,\n",
-       " 0.006121319718658924,\n",
-       " -0.01894245855510235,\n",
-       " -0.04353536665439606,\n",
-       " -0.016937894746661186,\n",
-       " 0.0056975362822413445,\n",
-       " -0.004089849069714546,\n",
-       " -0.009121439419686794,\n",
-       " -0.032853323966264725,\n",
-       " 0.0556434690952301,\n",
-       " 0.006935253739356995,\n",
-       " -0.017435671761631966,\n",
-       " 0.029086358845233917,\n",
-       " 0.029624497517943382,\n",
-       " -0.016036512330174446,\n",
-       " 0.01809488981962204,\n",
-       " 0.007897174917161465,\n",
-       " -0.013453450053930283,\n",
-       " -0.051580529659986496,\n",
-       " 0.030512424185872078,\n",
-       " 0.0027512304950505495,\n",
-       " -0.031104376539587975,\n",
-       " -0.03099674917757511,\n",
-       " 0.03879975154995918,\n",
-       " 0.0193729680031538,\n",
-       " 0.00539819709956646,\n",
-       " 0.06226256862282753,\n",
-       " 0.00551255140453577,\n",
-       " 0.017906542867422104,\n",
-       " -0.004089849069714546,\n",
-       " -0.015229305252432823,\n",
-       " -0.0192249808460474,\n",
-       " -0.023651165887713432,\n",
-       " -0.002043242799118161,\n",
-       " 0.0007563361432403326,\n",
-       " 0.007587745785713196,\n",
-       " -0.010830027051270008,\n",
-       " 0.008246964775025845,\n",
-       " 0.044127315282821655,\n",
-       " -0.008919637650251389,\n",
-       " -0.005472190678119659,\n",
-       " 0.012404081411659718,\n",
-       " -0.01666882447898388,\n",
-       " -0.016426661983132362,\n",
-       " -0.02474089525640011,\n",
-       " -0.012195552699267864,\n",
-       " -0.0016488884575664997,\n",
-       " -0.004607806913554668,\n",
-       " -0.01870029605925083,\n",
-       " -0.013830146752297878,\n",
-       " 0.009713390842080116,\n",
-       " 0.015632908791303635,\n",
-       " -0.0273912250995636,\n",
-       " 0.0006550148827955127,\n",
-       " 0.03656647726893425,\n",
-       " -0.01140852551907301,\n",
-       " 0.0023745340295135975,\n",
-       " -0.017287682741880417,\n",
-       " -0.035328760743141174,\n",
-       " 0.025884438306093216,\n",
-       " 0.04052179306745529,\n",
-       " -0.006302941590547562,\n",
-       " 0.023624258115887642,\n",
-       " 0.02266906388103962,\n",
-       " 0.02584407851099968,\n",
-       " -0.005145944654941559,\n",
-       " -0.005293932743370533,\n",
-       " 0.001347026671282947,\n",
-       " 0.01459699310362339,\n",
-       " 0.006010328885167837,\n",
-       " -0.016184501349925995,\n",
-       " -0.014475912787020206,\n",
-       " 0.007305223494768143,\n",
-       " -0.006706545129418373,\n",
-       " -0.02092011459171772,\n",
-       " 0.03452155366539955,\n",
-       " 0.03976839780807495,\n",
-       " -0.003048888174816966,\n",
-       " -0.025938251987099648,\n",
-       " -0.011354711838066578,\n",
-       " -0.02129681222140789,\n",
-       " -0.0167495459318161,\n",
-       " ...]"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "## change to this design\n",
-    "from openai import OpenAI\n",
-    "embedding_model = ragas_embedding(provider=\"openai\", model=\"text-embedding-3-small\", client=OpenAI())\n",
-    "embedding_model.embed_text(\"Hello, world!\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/exceptions.ipynb b/experimental/old_nbs/api/exceptions.ipynb
deleted file mode 100644
index a33d7ae68..000000000
--- a/experimental/old_nbs/api/exceptions.ipynb
+++ /dev/null
@@ -1,118 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Exceptions\n",
-    "\n",
-    "> All the exceptions specific to the `notion_annotator` project."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp exceptions"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class RagasError(Exception):\n",
-    "    \"\"\"Base class for all Ragas-related exceptions.\"\"\"\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class ValidationError(Exception):\n",
-    "    \"\"\"Raised when field validation fails.\"\"\"\n",
-    "\n",
-    "    pass\n",
-    "\n",
-    "\n",
-    "class DuplicateError(Exception):\n",
-    "    \"\"\"Raised when multiple items are found but only one was expected.\"\"\"\n",
-    "\n",
-    "    pass\n",
-    "\n",
-    "\n",
-    "class NotFoundError(Exception):\n",
-    "    \"\"\"Raised when an item is not found.\"\"\"\n",
-    "\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "class ResourceNotFoundError(RagasError):\n",
-    "    \"\"\"Exception raised when a requested resource doesn't exist.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class ProjectNotFoundError(ResourceNotFoundError):\n",
-    "    \"\"\"Exception raised when a project doesn't exist.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class DatasetNotFoundError(ResourceNotFoundError):\n",
-    "    \"\"\"Exception raised when a dataset doesn't exist.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class ExperimentNotFoundError(ResourceNotFoundError):\n",
-    "    \"\"\"Exception raised when an experiment doesn't exist.\"\"\"\n",
-    "    pass"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "class DuplicateResourceError(RagasError):\n",
-    "    \"\"\"Exception raised when multiple resources exist with the same identifier.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class DuplicateProjectError(DuplicateResourceError):\n",
-    "    \"\"\"Exception raised when multiple projects exist with the same name.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class DuplicateDatasetError(DuplicateResourceError):\n",
-    "    \"\"\"Exception raised when multiple datasets exist with the same name.\"\"\"\n",
-    "    pass\n",
-    "\n",
-    "class DuplicateExperimentError(DuplicateResourceError):\n",
-    "    \"\"\"Exception raised when multiple experiments exist with the same name.\"\"\"\n",
-    "    pass"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/experiment.ipynb b/experimental/old_nbs/api/experiment.ipynb
deleted file mode 100644
index f4be56bae..000000000
--- a/experimental/old_nbs/api/experiment.ipynb
+++ /dev/null
@@ -1,242 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Experiment\n",
-    "\n",
-    "> Experiments hold the results of an experiment against a dataset."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp experiment"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# | export\n",
-    "import typing as t\n",
-    "\n",
-    "from fastcore.utils import patch\n",
-    "\n",
-    "from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel\n",
-    "from ragas_experimental.backends.ragas_api_client import RagasApiClient\n",
-    "from ragas_experimental.dataset import Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class Experiment(Dataset):\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        name: str,\n",
-    "        model: t.Type[BaseModel],\n",
-    "        project_id: str,\n",
-    "        experiment_id: str,\n",
-    "        ragas_api_client: t.Optional[RagasApiClient] = None,\n",
-    "        backend: t.Literal[\"ragas_app\", \"local\"] = \"ragas_app\",\n",
-    "        local_root_dir: t.Optional[str] = None,\n",
-    "    ):\n",
-    "        self.experiment_id = experiment_id\n",
-    "        super().__init__(\n",
-    "            name=name, \n",
-    "            model=model, \n",
-    "            project_id=project_id, \n",
-    "            dataset_id=experiment_id, \n",
-    "            ragas_api_client=ragas_api_client,\n",
-    "            backend=backend,\n",
-    "            local_root_dir=local_root_dir,\n",
-    "            datatable_type=\"experiments\",\n",
-    "        )\n",
-    "\n",
-    "    def __str__(self):\n",
-    "        return f\"Experiment(name={self.name}, model={self.model.__name__}, len={len(self._entries)})\"\n",
-    "\n",
-    "    __repr__ = __str__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import Project, BaseModel\n",
-    "from ragas_experimental.metric import MetricResult\n",
-    "from ragas_experimental.utils import get_test_directory\n",
-    "import typing as t"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "root_dir = get_test_directory()\n",
-    "\n",
-    "p = Project(\n",
-    "    project_id=\"test\",\n",
-    "    backend=\"local\",\n",
-    "    root_dir=root_dir,\n",
-    ")\n",
-    "\n",
-    "class TestDataRow(BaseModel):\n",
-    "    id: t.Optional[int]\n",
-    "    query: str\n",
-    "    persona: t.List[t.Literal[\"opt1\", \"opt2\", \"opt3\"]]\n",
-    "\n",
-    "dataset = p.create_dataset(\n",
-    "    name=\"test_dataset\",\n",
-    "    model=TestDataRow,\n",
-    ")\n",
-    "\n",
-    "for i in range(3):\n",
-    "    row = TestDataRow(id=i, query=f\"query_{i}\", persona=[\"opt1\"])\n",
-    "    dataset.append(row)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "i = 0\n",
-    "row = TestDataRow(id=i, query=f\"query_{i}\", persona=[\"opt1\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': {'value': 0, 'type': typing.Optional[int]},\n",
-       " 'query': {'value': 'query_0', 'type': str},\n",
-       " 'persona': {'value': ['opt1'],\n",
-       "  'type': typing.List[typing.Literal['opt1', 'opt2', 'opt3']]}}"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "fields_info = {\n",
-    "    field_name: {\n",
-    "        'value': getattr(row, field_name),\n",
-    "        'type': field_info.annotation\n",
-    "    }\n",
-    "    for field_name, field_info in row.model_fields.items()\n",
-    "}\n",
-    "\n",
-    "fields_info"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class ExperimentDataRow(TestDataRow):\n",
-    "    response: str \n",
-    "    metrics: t.List[MetricResult]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "for d in dataset:\n",
-    "    response = \"test\"\n",
-    "    score1 = MetricResult(result=1, reason=\"score 1\")\n",
-    "    score2 = MetricResult(result=0, reason=\"score 2\")\n",
-    "\n",
-    "    e = ExperimentDataRow(\n",
-    "        id=row.id,\n",
-    "        query=row.query,\n",
-    "        persona=[\"opt1\"],\n",
-    "        response=response,\n",
-    "        metrics=[score1, score2],\n",
-    "    )\n",
-    "    experiments.append(e)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['test_experiment.csv']"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import os\n",
-    "\n",
-    "os.listdir(\n",
-    "    os.path.join(root_dir, \"test\", \"experiments\")\n",
-    ")"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/index.ipynb b/experimental/old_nbs/api/index.ipynb
deleted file mode 100644
index 7b9c879a1..000000000
--- a/experimental/old_nbs/api/index.ipynb
+++ /dev/null
@@ -1,103 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Ragas Experimental\n",
-    "\n",
-    "> Experimental Ragas Evaluation UI and Library"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Usage"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Installation"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Install latest from the GitHub [repository][repo]:\n",
-    "\n",
-    "```sh\n",
-    "$ pip install git+https://github.com/explodinggradients/ragas_experimental.git\n",
-    "```\n",
-    "\n",
-    "or from [pypi][pypi]\n",
-    "\n",
-    "\n",
-    "```sh\n",
-    "$ pip install ragas_experimental\n",
-    "```\n",
-    "\n",
-    "\n",
-    "[repo]: https://github.com/explodinggradients/ragas_experimental\n",
-    "[docs]: https://explodinggradients.github.io/ragas_experimental/\n",
-    "[pypi]: https://pypi.org/project/ragas_experimental/"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Getting Started"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "First do signup to [beta.app.ragas.io](https://beta.app.ragas.io/) and generate the App Token and put it in the as the env variable `RAGAS_APP_TOKEN`. "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "# ideally you load this from a .env file so as to not commit it to the repo\n",
-    "os.environ[\"RAGAS_APP_TOKEN\"] = \"api-key\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Now lets init a `Project` in the App"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import Project\n",
-    "\n",
-    "project = Project.create(\"my-project\")\n",
-    "project"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/experimental/old_nbs/api/init_module.ipynb b/experimental/old_nbs/api/init_module.ipynb
deleted file mode 100644
index 9503a0d84..000000000
--- a/experimental/old_nbs/api/init_module.ipynb
+++ /dev/null
@@ -1,85 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp __init__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "# Get version from setuptools_scm-generated file\n",
-    "try:\n",
-    "    from ._version import version as __version__\n",
-    "except ImportError:\n",
-    "    # Fallback for installed package\n",
-    "    from importlib.metadata import version as pkg_version, PackageNotFoundError\n",
-    "    try:\n",
-    "        __version__ = pkg_version(\"ragas_experimental\")\n",
-    "    except PackageNotFoundError:\n",
-    "        __version__ = \"unknown\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This exports to `__init__` for `RagasAnnotator` root so that we have more control over how we expose module for the users.\n",
-    "\n",
-    "We use setuptools_scm to get version information from Git tags."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "from ragas_experimental.project.core import Project\n",
-    "import ragas_experimental.model.notion_typing as nmt\n",
-    "from ragas_experimental.model.notion_model import NotionModel\n",
-    "from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel\n",
-    "\n",
-    "# just import to run the module\n",
-    "import ragas_experimental.project.datasets\n",
-    "import ragas_experimental.project.experiments\n",
-    "import ragas_experimental.project.comparison"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "__all__ = [\"Project\", \"NotionModel\", \"nmt\", \"BaseModel\"]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/llm/llm.ipynb b/experimental/old_nbs/api/llm/llm.ipynb
deleted file mode 100644
index 148692c2a..000000000
--- a/experimental/old_nbs/api/llm/llm.ipynb
+++ /dev/null
@@ -1,257 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp llm.llm"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# LLMs"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "import asyncio\n",
-    "import inspect\n",
-    "import threading\n",
-    "from pydantic import BaseModel\n",
-    "import instructor\n",
-    "\n",
-    "T = t.TypeVar('T', bound=BaseModel)\n",
-    "\n",
-    "class RagasLLM:\n",
-    "    def __init__(self, provider: str, model: str, client: t.Any, **model_args):\n",
-    "        self.provider = provider.lower()\n",
-    "        self.model = model\n",
-    "        self.model_args = model_args or {}\n",
-    "        self.client = self._initialize_client(provider, client)\n",
-    "        # Check if client is async-capable at initialization\n",
-    "        self.is_async = self._check_client_async()\n",
-    "    \n",
-    "    def _check_client_async(self) -> bool:\n",
-    "        \"\"\"Determine if the client is async-capable.\"\"\"\n",
-    "        try:\n",
-    "            # Check if this is an async client by checking for a coroutine method\n",
-    "            if hasattr(self.client.chat.completions, 'create'):\n",
-    "                return inspect.iscoroutinefunction(self.client.chat.completions.create)\n",
-    "            return False\n",
-    "        except (AttributeError, TypeError):\n",
-    "            return False\n",
-    "    \n",
-    "    def _initialize_client(self, provider: str, client: t.Any) -> t.Any:\n",
-    "        provider = provider.lower()\n",
-    "        \n",
-    "        if provider == \"openai\":\n",
-    "            return instructor.from_openai(client)\n",
-    "        elif provider == \"anthropic\":\n",
-    "            return instructor.from_anthropic(client)\n",
-    "        elif provider == \"cohere\":\n",
-    "            return instructor.from_cohere(client)\n",
-    "        elif provider == \"gemini\":\n",
-    "            return instructor.from_gemini(client)\n",
-    "        elif provider == \"litellm\":\n",
-    "            return instructor.from_litellm(client)\n",
-    "        else:\n",
-    "            raise ValueError(f\"Unsupported provider: {provider}\")\n",
-    "    \n",
-    "    def _run_async_in_current_loop(self, coro):\n",
-    "        \"\"\"Run an async coroutine in the current event loop if possible.\n",
-    "        \n",
-    "        This handles Jupyter environments correctly by using a separate thread\n",
-    "        when a running event loop is detected.\n",
-    "        \"\"\"\n",
-    "        try:\n",
-    "            # Try to get the current event loop\n",
-    "            loop = asyncio.get_event_loop()\n",
-    "            \n",
-    "            if loop.is_running():\n",
-    "                # If the loop is already running (like in Jupyter notebooks),\n",
-    "                # we run the coroutine in a separate thread with its own event loop\n",
-    "                result_container = {'result': None, 'exception': None}\n",
-    "                \n",
-    "                def run_in_thread():\n",
-    "                    # Create a new event loop for this thread\n",
-    "                    new_loop = asyncio.new_event_loop()\n",
-    "                    asyncio.set_event_loop(new_loop)\n",
-    "                    try:\n",
-    "                        # Run the coroutine in this thread's event loop\n",
-    "                        result_container['result'] = new_loop.run_until_complete(coro)\n",
-    "                    except Exception as e:\n",
-    "                        # Capture any exceptions to re-raise in the main thread\n",
-    "                        result_container['exception'] = e\n",
-    "                    finally:\n",
-    "                        # Clean up the event loop\n",
-    "                        new_loop.close()\n",
-    "                \n",
-    "                # Start the thread and wait for it to complete\n",
-    "                thread = threading.Thread(target=run_in_thread)\n",
-    "                thread.start()\n",
-    "                thread.join()\n",
-    "                \n",
-    "                # Re-raise any exceptions that occurred in the thread\n",
-    "                if result_container['exception']:\n",
-    "                    raise result_container['exception']\n",
-    "                    \n",
-    "                return result_container['result']\n",
-    "            else:\n",
-    "                # Standard case - event loop exists but isn't running\n",
-    "                return loop.run_until_complete(coro)\n",
-    "                \n",
-    "        except RuntimeError:\n",
-    "            # If we get a runtime error about no event loop, create a new one\n",
-    "            loop = asyncio.new_event_loop()\n",
-    "            asyncio.set_event_loop(loop)\n",
-    "            try:\n",
-    "                return loop.run_until_complete(coro)\n",
-    "            finally:\n",
-    "                # Clean up\n",
-    "                loop.close()\n",
-    "                asyncio.set_event_loop(None)\n",
-    "    \n",
-    "    def generate(self, prompt: str, response_model: t.Type[T]) -> T:\n",
-    "        \"\"\"Generate a response using the configured LLM.\n",
-    "        \n",
-    "        For async clients, this will run the async method in the appropriate event loop.\n",
-    "        \"\"\"\n",
-    "        messages = [{\"role\": \"user\", \"content\": prompt}]\n",
-    "        \n",
-    "        # If client is async, use the appropriate method to run it\n",
-    "        if self.is_async:\n",
-    "            return self._run_async_in_current_loop(\n",
-    "                self.agenerate(prompt, response_model)\n",
-    "            )\n",
-    "        else:\n",
-    "            # Regular sync client, just call the method directly\n",
-    "            return self.client.chat.completions.create(\n",
-    "                model=self.model,\n",
-    "                messages=messages,\n",
-    "                response_model=response_model,\n",
-    "                **self.model_args,\n",
-    "            )\n",
-    "    \n",
-    "    async def agenerate(self, prompt: str, response_model: t.Type[T]) -> T:\n",
-    "        \"\"\"Asynchronously generate a response using the configured LLM.\"\"\"\n",
-    "        messages = [{\"role\": \"user\", \"content\": prompt}]\n",
-    "        \n",
-    "        # If client is not async, raise a helpful error\n",
-    "        if not self.is_async:\n",
-    "            raise TypeError(\n",
-    "                \"Cannot use agenerate() with a synchronous client. Use generate() instead.\"\n",
-    "            )\n",
-    "        \n",
-    "        # Regular async client, call the method directly\n",
-    "        return await self.client.chat.completions.create(\n",
-    "            model=self.model,\n",
-    "            messages=messages,\n",
-    "            response_model=response_model,\n",
-    "            **self.model_args,\n",
-    "        )\n",
-    "\n",
-    "def ragas_llm(provider: str, model: str, client: t.Any, **model_args) -> RagasLLM:\n",
-    "    return RagasLLM(provider=provider, client=client, model=model, **model_args)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example Usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "from openai import OpenAI\n",
-    "class Response(BaseModel):\n",
-    "    response: str\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "llm.generate(\"What is the capital of India?\",response_model=Response) #works fine\n",
-    "\n",
-    "try:\n",
-    "    await llm.agenerate(\"What is the capital of India?\", response_model=Response)\n",
-    "except TypeError as e:\n",
-    "    assert isinstance(e, TypeError)\n",
-    "#gives TypeError: object Response can't be used in 'await' expression\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Response(response='The capital of India is New Delhi.')"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "from openai import AsyncOpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=AsyncOpenAI())\n",
-    "await llm.agenerate(\"What is the capital of India?\",response_model=Response)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Response(response='The capital of India is New Delhi.')"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "from anthropic import Anthropic\n",
-    "\n",
-    "llm = ragas_llm(provider=\"anthropic\",model=\"claude-3-opus-20240229\",client=Anthropic(),max_tokens=1024)\n",
-    "llm.generate(\"What is the capital of India?\",response_model=Response)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/base.ipynb b/experimental/old_nbs/api/metric/base.ipynb
deleted file mode 100644
index 6e59eb183..000000000
--- a/experimental/old_nbs/api/metric/base.ipynb
+++ /dev/null
@@ -1,231 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "00ef8db1",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.base"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "2eb8f806",
-   "metadata": {},
-   "source": [
-    "# BaseMetric\n",
-    "> base class for all type of metrics in ragas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "e8ccff58",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "from abc import ABC, abstractmethod\n",
-    "import asyncio\n",
-    "from dataclasses import dataclass, field\n",
-    "from pydantic import BaseModel\n",
-    "import typing as t\n",
-    "from tqdm import tqdm\n",
-    "import string\n",
-    "\n",
-    "\n",
-    "from ragas_experimental.prompt.base import Prompt\n",
-    "from ragas_experimental.embedding.base import BaseEmbedding\n",
-    "from ragas_experimental.metric import MetricResult\n",
-    "from ragas_experimental.llm import RagasLLM\n",
-    "from ragas_experimental.model.notion_model import NotionModel\n",
-    "from ragas_experimental.prompt.dynamic_few_shot import DynamicFewShotPrompt\n",
-    "\n",
-    "if t.TYPE_CHECKING:\n",
-    "    from ragas_experimental.project.core import Project"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@dataclass\n",
-    "class Metric(ABC):\n",
-    "    \"\"\"Base class for all metrics in the LLM evaluation library.\"\"\"\n",
-    "    name: str\n",
-    "    prompt: str | Prompt\n",
-    "    llm: RagasLLM\n",
-    "    _response_models: t.Dict[bool, t.Type[BaseModel]] = field(\n",
-    "        default_factory=dict, init=False, repr=False\n",
-    "    )\n",
-    "    \n",
-    "    def __post_init__(self):\n",
-    "        if isinstance(self.prompt,str):\n",
-    "            self.prompt = Prompt(self.prompt)\n",
-    "    \n",
-    "    @abstractmethod\n",
-    "    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:\n",
-    "        \"\"\"Get the appropriate response model.\"\"\"\n",
-    "        pass\n",
-    "\n",
-    "    @abstractmethod\n",
-    "    def _ensemble(self, results: t.List[MetricResult]) -> MetricResult:\n",
-    "        pass\n",
-    "    \n",
-    "    def get_variables(self) -> t.List[str]:\n",
-    "        if isinstance(self.prompt, Prompt):\n",
-    "            fstr = self.prompt.instruction\n",
-    "        else:\n",
-    "            fstr = self.prompt\n",
-    "        vars = [field_name for _, field_name, _, _ in string.Formatter().parse(fstr) if field_name]\n",
-    "        return vars\n",
-    "    \n",
-    "    def score(self, reasoning: bool = True, n: int = 1, **kwargs) -> t.Any:\n",
-    "        responses = []\n",
-    "        traces = {}\n",
-    "        traces[\"input\"] = kwargs\n",
-    "        prompt_input = self.prompt.format(**kwargs)\n",
-    "        for _ in range(n):\n",
-    "            response = self.llm.generate(prompt_input, response_model = self._get_response_model(reasoning)) \n",
-    "            traces['output'] = response.model_dump()\n",
-    "            response = MetricResult(**response.model_dump())\n",
-    "            responses.append(response)\n",
-    "        results = self._ensemble(responses)\n",
-    "        results.traces = traces\n",
-    "        return results\n",
-    "\n",
-    "\n",
-    "    async def ascore(self, reasoning: bool = True, n: int = 1, **kwargs) -> MetricResult:\n",
-    "        responses = []  # Added missing initialization\n",
-    "        traces = {}\n",
-    "        traces[\"input\"] = kwargs\n",
-    "        prompt_input = self.prompt.format(**kwargs)\n",
-    "        for _ in range(n):\n",
-    "            response = await self.llm.agenerate(prompt_input, response_model = self._get_response_model(reasoning))\n",
-    "            traces['output'] = response.model_dump()\n",
-    "            response = MetricResult(**response.model_dump())  # Fixed missing parentheses\n",
-    "            responses.append(response)\n",
-    "        results = self._ensemble(responses)\n",
-    "        results.traces = traces\n",
-    "        return results\n",
-    "        \n",
-    "    def batch_score(self, inputs: t.List[t.Dict[str, t.Any]], reasoning: bool = True, n: int = 1) -> t.List[t.Any]:\n",
-    "        return [self.score(reasoning, n, **input_dict) for input_dict in inputs]\n",
-    "    \n",
-    "    async def abatch_score(self, inputs: t.List[t.Dict[str, t.Any]], reasoning: bool = True, n: int = 1) -> t.List[MetricResult]:\n",
-    "        async_tasks = []\n",
-    "        for input_dict in inputs:\n",
-    "            # Add reasoning and n to the input parameters\n",
-    "            async_tasks.append(self.ascore(reasoning=reasoning, n=n, **input_dict))\n",
-    "            \n",
-    "        # Run all tasks concurrently and return results\n",
-    "        return await asyncio.gather(*async_tasks)\n",
-    "    \n",
-    "    def train(self,project: \"Project\", experiment_names: t.List[str], model:NotionModel, embedding_model: BaseEmbedding,method: t.Dict[str, t.Any]):\n",
-    "        \n",
-    "        assert isinstance(self.prompt, Prompt)\n",
-    "        self.prompt = DynamicFewShotPrompt.from_prompt(self.prompt,embedding_model)\n",
-    "        datasets = []\n",
-    "        for experiment_name in experiment_names:\n",
-    "            experiment_data = project.get_experiment(experiment_name,model)\n",
-    "            experiment_data.load()\n",
-    "            datasets.append(experiment_data)\n",
-    "        \n",
-    "        total_items = sum([len(dataset) for dataset in datasets])\n",
-    "        input_vars = self.get_variables()\n",
-    "        output_vars = [self.name, f'{self.name}_reason']\n",
-    "        with tqdm(total=total_items, desc=\"Processing examples\") as pbar:\n",
-    "            for dataset in datasets:\n",
-    "                for row in dataset:\n",
-    "                    inputs = {var: getattr(row, var) for var in input_vars if hasattr(row, var)}\n",
-    "                    output = {var: getattr(row, var) for var in output_vars if hasattr(row, var)}\n",
-    "                    if output:\n",
-    "                            self.prompt.add_example(inputs,output)\n",
-    "                    pbar.update(1)\n",
-    "        \n",
-    "                \n",
-    "                \n",
-    "        \n",
-    "        \n",
-    "        \n",
-    "                "
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "fc4b7458",
-   "metadata": {},
-   "source": [
-    "### Example\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "fcf208fa",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "from ragas_experimental.llm import ragas_llm\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "\n",
-    "@dataclass\n",
-    "class CustomMetric(Metric):\n",
-    "    values: t.List[str] = field(default_factory=lambda: [\"pass\", \"fail\"])\n",
-    "    \n",
-    "    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:\n",
-    "        \"\"\"Get or create a response model based on reasoning parameter.\"\"\"\n",
-    "        \n",
-    "        class mymodel(BaseModel):\n",
-    "            result: int\n",
-    "            reason: t.Optional[str] = None\n",
-    "            \n",
-    "        return mymodel \n",
-    "\n",
-    "    def _ensemble(self,results:t.List[MetricResult]) -> MetricResult:\n",
-    "        \n",
-    "        return results[0]  # Placeholder for ensemble logic\n",
-    "\n",
-    "my_metric = CustomMetric(name=\"example\", prompt=\"What is the result of {input}?\", llm=llm)\n",
-    "my_metric.score(input=\"test\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/decorator.ipynb b/experimental/old_nbs/api/metric/decorator.ipynb
deleted file mode 100644
index fe1549448..000000000
--- a/experimental/old_nbs/api/metric/decorator.ipynb
+++ /dev/null
@@ -1,213 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.decorator"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Decorator Factory\n",
-    "> decorator factory for creating custom metrics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "import inspect\n",
-    "import asyncio\n",
-    "from dataclasses import dataclass\n",
-    "from ragas_experimental.metric import MetricResult\n",
-    "from ragas_experimental.llm import RagasLLM\n",
-    "from ragas_experimental.prompt.base import Prompt\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "def create_metric_decorator(metric_class):\n",
-    "    \"\"\"\n",
-    "    Factory function that creates decorator factories for different metric types.\n",
-    "    \n",
-    "    Args:\n",
-    "        metric_class: The metric class to use (DiscreteMetrics, NumericMetrics, etc.)\n",
-    "        \n",
-    "    Returns:\n",
-    "        A decorator factory function for the specified metric type\n",
-    "    \"\"\"\n",
-    "    def decorator_factory(llm:RagasLLM, prompt: t.Union[str, Prompt], name: t.Optional[str] = None, **metric_params):\n",
-    "        \"\"\"\n",
-    "        Creates a decorator that wraps a function into a metric instance.\n",
-    "        \n",
-    "        Args:\n",
-    "            llm: The language model instance to use\n",
-    "            prompt: The prompt template\n",
-    "            name: Optional name for the metric (defaults to function name)\n",
-    "            **metric_params: Additional parameters specific to the metric type\n",
-    "                (values for DiscreteMetrics, range for NumericMetrics, etc.)\n",
-    "        \n",
-    "        Returns:\n",
-    "            A decorator function\n",
-    "        \"\"\"\n",
-    "        def decorator(func):\n",
-    "            # Get metric name and check if function is async\n",
-    "            metric_name = name or func.__name__\n",
-    "            is_async = inspect.iscoroutinefunction(func)\n",
-    "            \n",
-    "            #TODO: Move to dataclass type implementation\n",
-    "            @dataclass\n",
-    "            class CustomMetric(metric_class):\n",
-    "                \n",
-    "                def _run_sync_in_async(self, func, *args, **kwargs):\n",
-    "                    \"\"\"Run a synchronous function in an async context.\"\"\"\n",
-    "                    # For sync functions, just run them normally\n",
-    "                    return func(*args, **kwargs)\n",
-    "                \n",
-    "                def _execute_metric(self, is_async_execution, reasoning, **kwargs):\n",
-    "                    \"\"\"Execute the metric function with proper async handling.\"\"\"\n",
-    "                    try:\n",
-    "                        if is_async:\n",
-    "                            # Async function implementation\n",
-    "                            if is_async_execution:\n",
-    "                                # In async context, await the function directly\n",
-    "                                result = func(self.llm, self.prompt, **kwargs)\n",
-    "                            else:\n",
-    "                                # In sync context, run the async function in an event loop\n",
-    "                                try:\n",
-    "                                    loop = asyncio.get_event_loop()\n",
-    "                                except RuntimeError:\n",
-    "                                    loop = asyncio.new_event_loop()\n",
-    "                                    asyncio.set_event_loop(loop)\n",
-    "                                result = loop.run_until_complete(func(self.llm, self.prompt, **kwargs))\n",
-    "                        else:\n",
-    "                            # Sync function implementation\n",
-    "                            result = func(self.llm, self.prompt, **kwargs)\n",
-    "                        \n",
-    "                        return result\n",
-    "                    except Exception as e:\n",
-    "                        # Handle errors gracefully\n",
-    "                        error_msg = f\"Error executing metric {self.name}: {str(e)}\"\n",
-    "                        return MetricResult(result=None, reason=error_msg)\n",
-    "                \n",
-    "                def score(self, reasoning: bool = True, n: int = 1, **kwargs):\n",
-    "                    \"\"\"Synchronous scoring method.\"\"\"\n",
-    "                    return self._execute_metric(is_async_execution=False, reasoning=reasoning, **kwargs)\n",
-    "                \n",
-    "                async def ascore(self, reasoning: bool = True, n: int = 1, **kwargs):\n",
-    "                    \"\"\"Asynchronous scoring method.\"\"\"\n",
-    "                    if is_async:\n",
-    "                        # For async functions, await the result\n",
-    "                        result = await func(self.llm, self.prompt, **kwargs)\n",
-    "                        return self._extract_result(result, reasoning)\n",
-    "                    else:\n",
-    "                        # For sync functions, run normally\n",
-    "                        result = self._run_sync_in_async(func, self.llm, self.prompt, **kwargs)\n",
-    "                        return result\n",
-    "            \n",
-    "            # Create the metric instance with all parameters\n",
-    "            metric_instance = CustomMetric(\n",
-    "                name=metric_name,\n",
-    "                prompt=prompt,\n",
-    "                llm=llm,\n",
-    "                **metric_params\n",
-    "            )\n",
-    "            \n",
-    "            # Preserve metadata\n",
-    "            metric_instance.__name__ = metric_name\n",
-    "            metric_instance.__doc__ = func.__doc__\n",
-    "            \n",
-    "            return metric_instance\n",
-    "        \n",
-    "        return decorator\n",
-    "    \n",
-    "    return decorator_factory\n",
-    "\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example usage\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "low\n",
-      "The context or details of the user's response ('my response') are not provided, making it impossible to evaluate its helpfulness accurately.\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "\n",
-    "from ragas_experimental.metric import DiscreteMetric, MetricResult\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "from ragas_experimental.llm import ragas_llm\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "\n",
-    "discrete_metric = create_metric_decorator(DiscreteMetric)\n",
-    "\n",
-    "@discrete_metric(llm=llm,\n",
-    "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    name='new_metric',values=[\"low\",\"med\",\"high\"])\n",
-    "def my_metric(llm,prompt,**kwargs):\n",
-    "\n",
-    "        class response_model(BaseModel):\n",
-    "             output: t.List[bool]\n",
-    "             reason: str\n",
-    "        \n",
-    "        response = llm.generate(prompt.format(**kwargs),response_model=response_model)\n",
-    "        total = sum(response.output)\n",
-    "        if total < 1:\n",
-    "            score = 'low'\n",
-    "        else:\n",
-    "            score = 'high'\n",
-    "        return MetricResult(result=score, reason=response.reason)\n",
-    "\n",
-    "result = my_metric.score(response='my response') # result\n",
-    "print(result)\n",
-    "print(result.reason)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/discrete.ipynb b/experimental/old_nbs/api/metric/discrete.ipynb
deleted file mode 100644
index 6d75f6219..000000000
--- a/experimental/old_nbs/api/metric/discrete.ipynb
+++ /dev/null
@@ -1,191 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.discrete"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# DiscreteMetric\n",
-    "> Base class from which all discrete metrics should inherit."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import typing as t\n",
-    "from dataclasses import dataclass, field\n",
-    "from pydantic import BaseModel, create_model\n",
-    "from collections import Counter\n",
-    "from ragas_experimental.metric import Metric, MetricResult\n",
-    "from ragas_experimental.metric.decorator import create_metric_decorator\n",
-    "\n",
-    "\n",
-    "@dataclass\n",
-    "class DiscreteMetric(Metric):\n",
-    "    values: t.List[str] = field(default_factory=lambda: [\"pass\", \"fail\"])\n",
-    "    \n",
-    "    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:\n",
-    "        \"\"\"Get or create a response model based on reasoning parameter.\"\"\"\n",
-    "        \n",
-    "        if with_reasoning in self._response_models:\n",
-    "            return self._response_models[with_reasoning]\n",
-    "        \n",
-    "        model_name = 'response_model'\n",
-    "        values = tuple(self.values)\n",
-    "        fields = {\"result\": (t.Literal[values], ...)}\n",
-    "        \n",
-    "        if with_reasoning:\n",
-    "            fields[\"reason\"] = (str, ...) # type: ignore\n",
-    "        \n",
-    "        model = create_model(model_name, **fields)  # type: ignore\n",
-    "        self._response_models[with_reasoning] = model\n",
-    "        return model \n",
-    "\n",
-    "    def _ensemble(self,results:t.List[MetricResult]) -> MetricResult:\n",
-    "\n",
-    "\n",
-    "        if len(results)==1:\n",
-    "            return results[0]\n",
-    "            \n",
-    "        candidates = [candidate.result for candidate in results]\n",
-    "        counter = Counter(candidates)\n",
-    "        max_count = max(counter.values())\n",
-    "        for candidate in results:\n",
-    "            if counter[candidate.result] == max_count:\n",
-    "                result = candidate.result              \n",
-    "                reason = candidate.reason\n",
-    "                return MetricResult(result=result, reason=reason)\n",
-    "        \n",
-    "        return results[0]\n",
-    "\n",
-    "\n",
-    "discrete_metric = create_metric_decorator(DiscreteMetric)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Example usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "low\n",
-      "The response is incomplete and lacks any specific information. It cannot be evaluated for helpfulness without further context or content.\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "#| eval: false\n",
-    "\n",
-    "from ragas_experimental.llm import ragas_llm\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "\n",
-    "\n",
-    "my_metric = DiscreteMetric(\n",
-    "    llm=llm,\n",
-    "    name='helpfulness',\n",
-    "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    values=[\"low\",\"med\",\"high\"],\n",
-    ")\n",
-    "\n",
-    "result = my_metric.score(response=\"this is my response\")\n",
-    "print(result) #gives \"low\"\n",
-    "print(result.reason) #gives reasoning from llm\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Write custom discrete metric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "low\n",
-      "The prompt 'my response' does not provide sufficient information or context for me to evaluate its helpfulness. An answer needs to be specific and provide insight or information relative to a clear question or context.\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "from ragas_experimental.metric.result import MetricResult\n",
-    "\n",
-    "@discrete_metric(\n",
-    "    llm=llm,\n",
-    "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    name='new_metric',\n",
-    "    values=[\"low\",\"med\",\"high\"]\n",
-    ")\n",
-    "def my_metric(llm,prompt,**kwargs):\n",
-    "\n",
-    "        class response_model(BaseModel):\n",
-    "             output: t.List[bool]\n",
-    "             reason: str\n",
-    "        traces = {}\n",
-    "        traces['input'] = kwargs\n",
-    "        response = llm.generate(prompt.format(**kwargs),response_model=response_model)\n",
-    "        traces['output'] = response.model_dump()\n",
-    "        total = sum(response.output)\n",
-    "        if total < 1:\n",
-    "            score = 'low'\n",
-    "        else:\n",
-    "            score = 'high'\n",
-    "            \n",
-    "        return MetricResult(result=score,reason=response.reason,traces=traces)\n",
-    "\n",
-    "result = my_metric.score(response='my response') # result\n",
-    "print(result)\n",
-    "print(result.reason)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/numeric.ipynb b/experimental/old_nbs/api/metric/numeric.ipynb
deleted file mode 100644
index 89b3889ae..000000000
--- a/experimental/old_nbs/api/metric/numeric.ipynb
+++ /dev/null
@@ -1,199 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Numeric Metric\n",
-    "> Base class for all numeric metrics\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.numeric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "from dataclasses import dataclass, field\n",
-    "from pydantic import BaseModel, create_model\n",
-    "from ragas_experimental.metric import Metric, MetricResult\n",
-    "from ragas_experimental.metric.decorator import create_metric_decorator\n",
-    "\n",
-    "@dataclass\n",
-    "class NumericMetric(Metric):\n",
-    "    range: t.Tuple[float,float]\n",
-    "    \n",
-    "    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:\n",
-    "        \"\"\"Get or create a response model based on reasoning parameter.\"\"\"\n",
-    "        \n",
-    "        if with_reasoning in self._response_models:\n",
-    "            return self._response_models[with_reasoning]\n",
-    "        \n",
-    "        model_name = 'response_model'\n",
-    "        fields = {\"result\": (float,...)}\n",
-    "        \n",
-    "        if with_reasoning:\n",
-    "            fields[\"reason\"] = (str, ...) #type: ignore\n",
-    "        \n",
-    "        model = create_model(model_name, **fields)\n",
-    "        self._response_models[with_reasoning] = model\n",
-    "        return model \n",
-    "\n",
-    "    def _ensemble(self,results:t.List[MetricResult]) -> MetricResult:\n",
-    "\n",
-    "        if len(results)==1:\n",
-    "            return results[0]\n",
-    "    \n",
-    "        candidates = [candidate.result for candidate in results]\n",
-    "        result = sum(candidates)/len(candidates)\n",
-    "        reason = results[0].reason\n",
-    "    \n",
-    "        return MetricResult(result=result,reason=reason)\n",
-    "    \n",
-    "    \n",
-    "numeric_metric = create_metric_decorator(NumericMetric)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "\"The provided input lacks context or content to determine if it is helpful as it merely states 'this is my response' without any additional information.\""
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "#| eval: false\n",
-    "\n",
-    "from ragas_experimental.llm import ragas_llm\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "\n",
-    "\n",
-    "my_metric = NumericMetric(\n",
-    "    name='helpfulness',\n",
-    "    llm=llm,\n",
-    "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    range=(0,10),\n",
-    ")\n",
-    "\n",
-    "result = my_metric.score(response=\"this is my response\")\n",
-    "result #gives \"low\"\n",
-    "result.reason #gives reasoning from llm\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Write custom numeric metric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "10"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "\n",
-    "#| eval: false\n",
-    "from ragas_experimental.metric import MetricResult\n",
-    "\n",
-    "@numeric_metric(llm=llm,\n",
-    "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    name='new_metric',range=(0,10))\n",
-    "def my_metric(llm,prompt,**kwargs):\n",
-    "\n",
-    "        class response_model(BaseModel):\n",
-    "             output: int\n",
-    "             reason: str\n",
-    "        \n",
-    "        traces = {}\n",
-    "        traces['input'] = kwargs\n",
-    "        response = llm.generate(prompt.format(**kwargs),response_model=response_model)\n",
-    "        traces['output'] = response.dict()\n",
-    "        total = response.output\n",
-    "        if total < 1:\n",
-    "            score = 0\n",
-    "        else:\n",
-    "            score = 10\n",
-    "        return MetricResult(result=score,reason=response.reason,traces=traces)\n",
-    "\n",
-    "result = my_metric.score(response='my response') # result\n",
-    "result # 10\n",
-    "result.reason # the reason for the answer\n",
-    "\n",
-    "result1 = my_metric.score(response='my response 1') # result\n",
-    "result2 = my_metric.score(response='my response 2') # result\n",
-    "\n",
-    "result1 + result2 # should be addable and behave like a float\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/ranking.ipynb b/experimental/old_nbs/api/metric/ranking.ipynb
deleted file mode 100644
index a14072597..000000000
--- a/experimental/old_nbs/api/metric/ranking.ipynb
+++ /dev/null
@@ -1,230 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# RankingMetric\n",
-    "> Base class for ranking metrics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.ranking"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "from dataclasses import dataclass\n",
-    "from pydantic import BaseModel, Field\n",
-    "from ragas_experimental.metric import Metric, MetricResult\n",
-    "from ragas_experimental.metric.decorator import create_metric_decorator\n",
-    "\n",
-    "@dataclass\n",
-    "class RankingMetric(Metric):\n",
-    "    num_ranks: int\n",
-    "    \n",
-    "    def _get_response_model(self, with_reasoning: bool) -> t.Type[BaseModel]:\n",
-    "        \"\"\"Get or create a response model based on reasoning parameter.\"\"\"\n",
-    "        \n",
-    "        if with_reasoning in self._response_models:\n",
-    "            return self._response_models[with_reasoning]\n",
-    "        \n",
-    "        # Store values needed for validation\n",
-    "        num_ranks = self.num_ranks\n",
-    "        \n",
-    "        # Create explicit model classes instead of using create_model\n",
-    "        if with_reasoning:\n",
-    "            # Model with result and reason\n",
-    "            class ResponseModelWithReason(BaseModel):\n",
-    "                result: t.List[int] = Field(...)\n",
-    "                reason: str = Field(...)\n",
-    "                \n",
-    "                def model_post_init(self, __context):\n",
-    "                    expected = set(range(num_ranks))\n",
-    "                    if set(self.result) != expected:\n",
-    "                        raise ValueError(\n",
-    "                            f\"'result' must contain exactly the numbers {sorted(expected)} without repetition.\"\n",
-    "                        )\n",
-    "            \n",
-    "            self._response_models[with_reasoning] = ResponseModelWithReason\n",
-    "            return ResponseModelWithReason\n",
-    "        else:\n",
-    "            # Model with just result\n",
-    "            class ResponseModel(BaseModel):\n",
-    "                result: t.List[int] = Field(...)\n",
-    "                \n",
-    "                def model_post_init(self, __context):\n",
-    "                    expected = set(range(num_ranks))\n",
-    "                    if set(self.result) != expected:\n",
-    "                        raise ValueError(\n",
-    "                            f\"'result' must contain exactly the numbers {sorted(expected)} without repetition.\"\n",
-    "                        )\n",
-    "            \n",
-    "            self._response_models[with_reasoning] = ResponseModel\n",
-    "            return ResponseModel\n",
-    "\n",
-    "    def _ensemble(self, results: t.List[MetricResult]) -> MetricResult:\n",
-    "        if len(results) == 1:\n",
-    "            return results[0]\n",
-    "\n",
-    "        n_items = self.num_ranks  # Use the class attribute instead of len(results)\n",
-    "        borda_scores = [0] * n_items\n",
-    "\n",
-    "        for result in results:\n",
-    "            for position_idx, item_idx in enumerate(result.result):\n",
-    "                borda_scores[item_idx] += (n_items - position_idx)  # Fixed the formula\n",
-    "\n",
-    "        indexed_scores = [(score, i) for i, score in enumerate(borda_scores)]    \n",
-    "        indexed_scores.sort(key=lambda x: (-x[0], x[1]))    \n",
-    "        final_ranking = [pos for _, pos in indexed_scores]\n",
-    "\n",
-    "        if any(r.reason for r in results):\n",
-    "            reason = \"Ensemble ranking based on multiple evaluations.\\n\" + '\\n'.join([r.reason for r in results if r.reason])\n",
-    "        else:\n",
-    "            reason = None\n",
-    "        \n",
-    "        return MetricResult(result=final_ranking, reason=reason)\n",
-    "    \n",
-    "\n",
-    "ranking_metric = create_metric_decorator(RankingMetric)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[2, 1, 0]\n",
-      "Ensemble ranking based on multiple evaluations.\n",
-      "The ranking is based on the length and detail of each response. 'the longest and most detailed answer.' is the most comprehensive, followed by 'a bit more detailed.', and 'short answer.' is the briefest.\n",
-      "The ranking is based on the length and detail of each response. The response 'the longest and most detailed answer.' is ranked highest (2) because it is the most detailed, followed by 'a bit more detailed.' (1), and finally 'short answer.' (0) as it is the least detailed.\n",
-      "The responses are ranked based on the level of detail and length. 'short answer.' is the least detailed, 'a bit more detailed.' provides more information, and 'the longest and most detailed answer.' offers the most comprehensive explanation.\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "#| eval: false\n",
-    "\n",
-    "from ragas_experimental.llm import ragas_llm\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n",
-    "\n",
-    "my_ranking_metric = RankingMetric(\n",
-    "    name='response_ranking',\n",
-    "    llm=llm,  # Your language model instance\n",
-    "    prompt=\"Rank the following responses:\\n{candidates}\",\n",
-    "    num_ranks=3,\n",
-    ")\n",
-    "\n",
-    "# To score a single input (ranking candidate responses)\n",
-    "result = my_ranking_metric.score(candidates=[\n",
-    "    \"short answer.\",\n",
-    "    \"a bit more detailed.\",\n",
-    "    \"the longest and most detailed answer.\"\n",
-    "],n=3)\n",
-    "print(result)   # Might output something like: [1, 0, 2]\n",
-    "print(result.reason)  # Provides the reasoning behind the ranking\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Custom ranking metric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[1, 0, 2]\n",
-      "Ranked based on response clarity and detail.\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "\n",
-    "from ragas_experimental.metric import MetricResult\n",
-    "\n",
-    "@ranking_metric(\n",
-    "    llm=llm,  # Your language model instance\n",
-    "    prompt=\"Rank the following responses:\\n{candidates}\",\n",
-    "    name='new_ranking_metric',\n",
-    "    num_ranks=3\n",
-    ")\n",
-    "def my_ranking_metric(llm, prompt, **kwargs):\n",
-    "    # Your custom logic that calls the LLM and returns a tuple of (ranking, reason)\n",
-    "    # For example, process the prompt (formatted with candidates) and produce a ranking.\n",
-    "    ranking = [1, 0, 2]  # Dummy ranking: second candidate is best, then first, then third.\n",
-    "    reason = \"Ranked based on response clarity and detail.\"\n",
-    "    return MetricResult(result=ranking, reason=reason)\n",
-    "\n",
-    "# Using the decorator-based ranking metric:\n",
-    "result = my_ranking_metric.score(candidates=[\n",
-    "    \"Response A: short answer.\",\n",
-    "    \"Response B: a bit more detailed.\",\n",
-    "    \"Response C: the longest and most detailed answer.\"\n",
-    "])\n",
-    "print(result)   # E.g., [1, 0, 2]\n",
-    "print(result.reason)  # E.g., \"Ranked based on response clarity and detail.\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/metric/result.ipynb b/experimental/old_nbs/api/metric/result.ipynb
deleted file mode 100644
index 68ccd6be8..000000000
--- a/experimental/old_nbs/api/metric/result.ipynb
+++ /dev/null
@@ -1,432 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "215f57b4",
-   "metadata": {},
-   "source": [
-    "# MetricResult\n",
-    "> MetricResult object to store the result of a metric"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "164726f3",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp metric.result"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "dcc3080c",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "\n",
-    "from fastcore.utils import patch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0f1c801a-6568-4ba4-8bbe-30bf154174fe",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class MetricResult:\n",
-    "    \"\"\"Class to hold the result of a metric evaluation.\n",
-    "    \n",
-    "    This class behaves like its underlying result value but still provides access\n",
-    "    to additional metadata like reasoning.\n",
-    "    \n",
-    "    Works with:\n",
-    "    - DiscreteMetrics (string results)\n",
-    "    - NumericMetrics (float/int results)\n",
-    "    - RankingMetrics (list results)\n",
-    "    \"\"\"\n",
-    "    \n",
-    "    def __init__(self, result: t.Any, reason: t.Optional[str] = None, traces: t.Optional[t.Dict[str, t.Any]] = None):\n",
-    "        if traces is not None:\n",
-    "            invalid_keys = [key for key in traces.keys() if key not in {\"input\", \"output\"}]\n",
-    "            if invalid_keys:\n",
-    "                raise ValueError(f\"Invalid keys in traces: {invalid_keys}. Allowed keys are 'input' and 'output'.\")\n",
-    "        self._result = result\n",
-    "        self.reason = reason\n",
-    "        self.traces = traces\n",
-    "    \n",
-    "    def __repr__(self):\n",
-    "        return repr(self._result)\n",
-    "    \n",
-    "    # Access to underlying result\n",
-    "    @property\n",
-    "    def result(self):\n",
-    "        \"\"\"Get the raw result value.\"\"\"\n",
-    "        return self._result\n",
-    "    \n",
-    "    \n",
-    "    # String conversion - works for all types\n",
-    "    def __str__(self):\n",
-    "        return str(self._result)\n",
-    "    \n",
-    "    # Container-like behaviors for list results (RankingMetric)\n",
-    "    def __getitem__(self, key):\n",
-    "        if not hasattr(self._result, \"__getitem__\"):\n",
-    "            raise TypeError(f\"{type(self._result).__name__} object is not subscriptable\")\n",
-    "        return self._result[key]\n",
-    "    \n",
-    "    def __iter__(self):\n",
-    "        if not hasattr(self._result, \"__iter__\"):\n",
-    "            raise TypeError(f\"{type(self._result).__name__} object is not iterable\")\n",
-    "        return iter(self._result)\n",
-    "    \n",
-    "    def __len__(self):\n",
-    "        if not hasattr(self._result, \"__len__\"):\n",
-    "            raise TypeError(f\"{type(self._result).__name__} has no len()\")\n",
-    "        return len(self._result)\n",
-    "    \n",
-    "    # Numeric operations for numeric results (NumericMetric)\n",
-    "    def __float__(self):\n",
-    "        if isinstance(self._result, (int, float)):\n",
-    "            return float(self._result)\n",
-    "        raise TypeError(f\"Cannot convert {type(self._result).__name__} to float\")\n",
-    "    \n",
-    "    def __int__(self):\n",
-    "        if isinstance(self._result, (int, float)):\n",
-    "            return int(self._result)\n",
-    "        raise TypeError(f\"Cannot convert {type(self._result).__name__} to int\")\n",
-    "    \n",
-    "    def __add__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot add {type(self._result).__name__} objects\")\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result + other._result\n",
-    "        return self._result + other\n",
-    "    \n",
-    "    def __radd__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot add {type(self._result).__name__} objects\")\n",
-    "        return other + self._result\n",
-    "    \n",
-    "    def __sub__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot subtract {type(self._result).__name__} objects\")\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result - other._result\n",
-    "        return self._result - other\n",
-    "    \n",
-    "    def __rsub__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot subtract {type(self._result).__name__} objects\")\n",
-    "        return other - self._result\n",
-    "    \n",
-    "    def __mul__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot multiply {type(self._result).__name__} objects\")\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result * other._result\n",
-    "        return self._result * other\n",
-    "    \n",
-    "    def __rmul__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot multiply {type(self._result).__name__} objects\")\n",
-    "        return other * self._result\n",
-    "    \n",
-    "    def __truediv__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot divide {type(self._result).__name__} objects\")\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result / other._result\n",
-    "        return self._result / other\n",
-    "    \n",
-    "    def __rtruediv__(self, other):\n",
-    "        if not isinstance(self._result, (int, float)):\n",
-    "            raise TypeError(f\"Cannot divide {type(self._result).__name__} objects\")\n",
-    "        return other / self._result\n",
-    "    \n",
-    "    # Comparison operations - work for all types with same-type comparisons\n",
-    "    def __eq__(self, other):\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result == other._result\n",
-    "        return self._result == other\n",
-    "    \n",
-    "    def __lt__(self, other):\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result < other._result\n",
-    "        return self._result < other\n",
-    "    \n",
-    "    def __le__(self, other):\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result <= other._result\n",
-    "        return self._result <= other\n",
-    "    \n",
-    "    def __gt__(self, other):\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result > other._result\n",
-    "        return self._result > other\n",
-    "    \n",
-    "    def __ge__(self, other):\n",
-    "        if isinstance(other, MetricResult):\n",
-    "            return self._result >= other._result\n",
-    "        return self._result >= other\n",
-    "    \n",
-    "    # Method forwarding for type-specific behaviors\n",
-    "    def __getattr__(self, name):\n",
-    "        \"\"\"Forward attribute access to the result object if it has that attribute.\n",
-    "        \n",
-    "        This allows calling string methods on discrete results, \n",
-    "        numeric methods on numeric results, and list methods on ranking results.\n",
-    "        \"\"\"\n",
-    "        if hasattr(self._result, name):\n",
-    "            attr = getattr(self._result, name)\n",
-    "            if callable(attr):\n",
-    "                # If it's a method, wrap it to return MetricResult when appropriate\n",
-    "                def wrapper(*args, **kwargs):\n",
-    "                    result = attr(*args, **kwargs)\n",
-    "                    # If the result is of the same type as self._result, wrap it\n",
-    "                    if isinstance(result, type(self._result)):\n",
-    "                        return MetricResult(result=result, reason=self.reason)\n",
-    "                    return result\n",
-    "                return wrapper\n",
-    "            return attr\n",
-    "        raise AttributeError(f\"{type(self).__name__} has no attribute '{name}'\")\n",
-    "    \n",
-    "    # JSON/dict serialization\n",
-    "    def to_dict(self):\n",
-    "        \"\"\"Convert the result to a dictionary.\"\"\"\n",
-    "        return {\n",
-    "            \"result\": self._result,\n",
-    "            \"reason\": self.reason\n",
-    "        }"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "490cdd2f",
-   "metadata": {},
-   "source": [
-    "### Example Usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "24589401",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "42\n",
-      "This is a test\n",
-      "8.0\n",
-      "LOW\n",
-      "[2, 3]\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "\n",
-    "metric_result = MetricResult(result=42, reason=\"This is a test\")\n",
-    "print(metric_result)\n",
-    "print(metric_result.reason)\n",
-    "\n",
-    "### Example with Numeric Operations\n",
-    "num_result1 = MetricResult(result=5.0)\n",
-    "num_result2 = MetricResult(result=3.0)\n",
-    "print(num_result1 + num_result2)  # 8.0\n",
-    "\n",
-    "\n",
-    "### Example with String Operations\n",
-    "str_result = MetricResult(result=\"low\")\n",
-    "print(str_result.upper())  # \"LOW\"\n",
-    "\n",
-    "## Example with List Operations\n",
-    "list_result = MetricResult(result=[1, 2, 3])\n",
-    "print(list_result[1:])  # 2\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "06ce7a1d",
-   "metadata": {},
-   "source": [
-    "now lets make it `Pydantic` compatible also"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "5d8fb818",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "from pydantic_core import core_schema\n",
-    "from pydantic import GetCoreSchemaHandler, ValidationInfo"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f4c288c0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "@patch(cls_method=True)\n",
-    "def validate(cls: MetricResult, value: t.Any, info: ValidationInfo):\n",
-    "    \"\"\"Provide compatibility with older Pydantic versions.\"\"\"\n",
-    "    if isinstance(value, MetricResult):\n",
-    "        return value\n",
-    "    return MetricResult(result=value)\n",
-    "\n",
-    "@patch\n",
-    "def __json__(self: MetricResult):\n",
-    "    \"\"\"Return data for JSON serialization.\n",
-    "    \n",
-    "    This method is used by json.dumps and other JSON serializers \n",
-    "    to convert MetricResult to a JSON-compatible format.\n",
-    "    \"\"\"\n",
-    "    return {\n",
-    "        \"result\": self._result,\n",
-    "        \"reason\": self.reason,\n",
-    "    }\n",
-    "\n",
-    "@patch(cls_method=True)\n",
-    "def __get_pydantic_core_schema__(\n",
-    "    cls: MetricResult, \n",
-    "    _source_type: t.Any, \n",
-    "    _handler: GetCoreSchemaHandler\n",
-    ") -> core_schema.CoreSchema:\n",
-    "    \"\"\"Generate a Pydantic core schema for MetricResult.\n",
-    "    \n",
-    "    This custom schema handles different serialization behaviors:\n",
-    "    - For model_dump(): Returns the original MetricResult instance\n",
-    "    - For model_dump_json(): Converts to a JSON-compatible dict using __json__\n",
-    "    \"\"\"\n",
-    "    def serializer_function(instance, info):\n",
-    "        \"\"\"Handle different serialization modes for MetricResult.\"\"\"\n",
-    "        # For JSON serialization (model_dump_json), use __json__ method\n",
-    "        if getattr(info, 'mode', None) == 'json':\n",
-    "            return instance.__json__()\n",
-    "        # For Python serialization (model_dump), return the instance itself\n",
-    "        return instance\n",
-    "    \n",
-    "    return core_schema.union_schema([\n",
-    "        # First schema: handles validation of MetricResult instances\n",
-    "        core_schema.is_instance_schema(MetricResult),\n",
-    "        \n",
-    "        # Second schema: handles validation of other values and conversion to MetricResult\n",
-    "        core_schema.chain_schema([\n",
-    "            core_schema.any_schema(),\n",
-    "            core_schema.no_info_plain_validator_function(\n",
-    "                lambda value: MetricResult(result=value) if not isinstance(value, MetricResult) else value\n",
-    "            ),\n",
-    "        ]),\n",
-    "    ], serialization=core_schema.plain_serializer_function_ser_schema(\n",
-    "        serializer_function,\n",
-    "        info_arg=True  # Explicitly specify that we're using the info argument\n",
-    "    ))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "f49739a6",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from pydantic import BaseModel\n",
-    "\n",
-    "class TestModel(BaseModel):\n",
-    "    response: str\n",
-    "    grade: MetricResult\n",
-    "    faithfulness: MetricResult\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6ac6b955",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "TestModel(response='test', grade=1, faithfulness=1)"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "m = TestModel(response=\"test\", grade=MetricResult(result=1, reason=\"test\"), faithfulness=MetricResult(result=1, reason=\"test\"))\n",
-    "m"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "4ffe750f",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'response': 'test', 'grade': 1, 'faithfulness': 1}"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "m.model_dump()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "0bc2a1ec",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'{\"response\":\"test\",\"grade\":{\"result\":1,\"reason\":\"test\"},\"faithfulness\":{\"result\":1,\"reason\":\"test\"}}'"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "m.model_dump_json()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/experimental/old_nbs/api/model/pydantic_mode.ipynb b/experimental/old_nbs/api/model/pydantic_mode.ipynb
deleted file mode 100644
index 917d42011..000000000
--- a/experimental/old_nbs/api/model/pydantic_mode.ipynb
+++ /dev/null
@@ -1,269 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Ragas `BaseModel`\n",
-    "\n",
-    "> An Extended version of Pydantics `BaseModel` for some ragas specific stuff"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp model.pydantic_model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| export\n",
-    "import typing as t\n",
-    "\n",
-    "from pydantic import BaseModel, PrivateAttr\n",
-    "\n",
-    "from ragas_experimental.typing import FieldMeta as RagasFieldMeta"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class ExtendedPydanticBaseModel(BaseModel):\n",
-    "    \"\"\"Extended Pydantic BaseModel with database integration capabilities\"\"\"\n",
-    "    \n",
-    "    # Private attribute for storing the database row_id\n",
-    "    _row_id: t.Optional[int] = PrivateAttr(default=None)\n",
-    "    \n",
-    "    # Class variable for storing column mapping overrides\n",
-    "    __column_mapping__: t.ClassVar[t.Dict[str, str]] = {}\n",
-    "    \n",
-    "    def __init__(self, **data):\n",
-    "        super().__init__(**data)\n",
-    "        # Initialize column mapping if not already defined\n",
-    "        if not self.__class__.__column_mapping__:\n",
-    "            self._initialize_column_mapping()\n",
-    "    \n",
-    "    @classmethod\n",
-    "    def _initialize_column_mapping(cls):\n",
-    "        \"\"\"Initialize mapping from field names to column IDs.\"\"\"\n",
-    "        for field_name, field_info in cls.model_fields.items():\n",
-    "            # Check if field has Column metadata (for Pydantic v2)\n",
-    "            column_id = None\n",
-    "            for extra in field_info.metadata or []:\n",
-    "                if isinstance(extra, RagasFieldMeta) and extra.id:\n",
-    "                    column_id = extra.id\n",
-    "                    break\n",
-    "            \n",
-    "            # If no Column metadata found, use field name as column ID\n",
-    "            if not column_id:\n",
-    "                column_id = field_name\n",
-    "            \n",
-    "            cls.__column_mapping__[field_name] = column_id\n",
-    "\n",
-    "            # check if the field is a MetricResult\n",
-    "            if cls._is_metric_result_field(field_info.annotation):\n",
-    "                # add additional mapping for the metric result\n",
-    "                reason_field_name = f\"{field_name}_reason\"\n",
-    "                reason_column_id = f\"{column_id}_reason\"\n",
-    "                cls.__column_mapping__[reason_field_name] = reason_column_id\n",
-    "\n",
-    "    @staticmethod\n",
-    "    def _is_metric_result_field(annotation):\n",
-    "        \"\"\"Check if a field annotation represents a MetricResult.\"\"\"\n",
-    "        # Direct import of MetricResult\n",
-    "        from ragas_experimental.metric.result import MetricResult\n",
-    "        \n",
-    "        # Check if annotation is or references MetricResult\n",
-    "        return (annotation is MetricResult or \n",
-    "                (hasattr(annotation, \"__origin__\") and annotation.__origin__ is MetricResult) or\n",
-    "                (hasattr(annotation, \"__class__\") and annotation.__class__ is MetricResult))\n",
-    "    \n",
-    "    @classmethod\n",
-    "    def get_column_id(cls, field_name: str) -> str:\n",
-    "        \"\"\"Get the column ID for a given field name.\"\"\"\n",
-    "        if field_name not in cls.__column_mapping__:\n",
-    "            raise ValueError(f\"No column mapping found for field {field_name}\")\n",
-    "        return cls.__column_mapping__[field_name]\n",
-    "    \n",
-    "    @classmethod\n",
-    "    def set_column_id(cls, field_name: str, column_id: str):\n",
-    "        \"\"\"Set the column ID for a given field name.\"\"\"\n",
-    "        if field_name not in cls.model_fields:\n",
-    "            raise ValueError(f\"Field {field_name} not found in model\")\n",
-    "        cls.__column_mapping__[field_name] = column_id\n",
-    "    \n",
-    "    def get_db_field_mapping(self) -> t.Dict[str, str]:\n",
-    "        \"\"\"Get a mapping from field names to column IDs for this model.\"\"\"\n",
-    "        return self.__class__.__column_mapping__\n",
-    "\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import ragas_experimental.typing as rt\n",
-    "from ragas_experimental.metric.result import MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Example usage\n",
-    "class TestDataRow(ExtendedPydanticBaseModel):\n",
-    "    id: t.Optional[int] = None\n",
-    "    query: t.Annotated[str, rt.Text(id=\"search_query\")]\n",
-    "    persona: t.List[t.Literal[\"opt1\", \"opt2\", \"opt3\"]]\n",
-    "    result: MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "TestDataRow.__column_mapping__ = {}"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "TestDataRow(id=1, query='this is a test', persona=['opt1'], result=0.5)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "test_data_row = TestDataRow(id=1, query=\"this is a test\", persona=[\"opt1\"], result=MetricResult(result=0.5, reason=\"test reason\"))\n",
-    "test_data_row"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': 'id',\n",
-       " 'query': 'search_query',\n",
-       " 'persona': 'persona',\n",
-       " 'result': 'result',\n",
-       " 'result_reason': 'result_reason'}"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "test_data_row.__column_mapping__"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Inheritance\n",
-    "class InheritedModel(TestDataRow):\n",
-    "    inherited_query: str\n",
-    "    query: str"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "inherited_model = InheritedModel(id=1, query=\"this is a test\", persona=[\"opt1\"], result=MetricResult(result=0.5, reason=\"test reason\"), inherited_query=\"this is a test\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "1"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "inherited_model.id"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/project/core.ipynb b/experimental/old_nbs/api/project/core.ipynb
deleted file mode 100644
index 31bd8f7fe..000000000
--- a/experimental/old_nbs/api/project/core.ipynb
+++ /dev/null
@@ -1,427 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Project\n",
-    "\n",
-    "> Use this class to represent the AI project that we are working on and to interact with datasets and experiments in it."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "673f70be",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp project.core"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "49c33521",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from nbdev.showdoc import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "585fc02a",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "\n",
-    "from ragas_experimental.model.notion_model import NotionModel"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "8562b7cc",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "import typing as t\n",
-    "import os\n",
-    "import asyncio\n",
-    "\n",
-    "from fastcore.utils import patch\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "from ragas_experimental.backends.factory import RagasApiClientFactory\n",
-    "from ragas_experimental.backends.ragas_api_client import RagasApiClient\n",
-    "import ragas_experimental.typing as rt\n",
-    "from ragas_experimental.utils import async_to_sync, create_nano_id\n",
-    "from ragas_experimental.dataset import Dataset\n",
-    "from ragas_experimental.experiment import Experiment"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "8836d2bb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class Project:\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        project_id: str,\n",
-    "        backend: rt.SUPPORTED_BACKENDS = \"local\",\n",
-    "        root_dir: t.Optional[str] = None,\n",
-    "        ragas_api_client: t.Optional[RagasApiClient] = None,\n",
-    "    ):\n",
-    "        self.project_id = project_id\n",
-    "        self.backend = backend\n",
-    "        \n",
-    "        if backend == \"local\":\n",
-    "            if root_dir is None:\n",
-    "                raise ValueError(\"root_dir is required for local backend\")\n",
-    "            self._root_dir = os.path.join(root_dir, project_id)\n",
-    "            # Ensure project directory structure exists\n",
-    "            self._create_local_project_structure()\n",
-    "        elif backend == \"ragas_app\":\n",
-    "            if ragas_api_client is None:\n",
-    "                self._ragas_api_client = RagasApiClientFactory.create()\n",
-    "            else:\n",
-    "                self._ragas_api_client = ragas_api_client\n",
-    "        else:\n",
-    "            raise ValueError(f\"Invalid backend: {backend}\")\n",
-    "            \n",
-    "        # Initialize project properties\n",
-    "        if backend == \"ragas_app\":\n",
-    "            try:\n",
-    "                sync_version = async_to_sync(self._ragas_api_client.get_project)\n",
-    "                existing_project = sync_version(project_id=self.project_id)\n",
-    "                self.project_id = existing_project[\"id\"]\n",
-    "                self.name = existing_project[\"title\"]\n",
-    "                self.description = existing_project[\"description\"]\n",
-    "            except Exception as e:\n",
-    "                raise e\n",
-    "        elif backend == \"local\":\n",
-    "            self.name = self.project_id\n",
-    "            self.description = \"\"\n",
-    "    \n",
-    "    def _create_local_project_structure(self):\n",
-    "        \"\"\"Create the local directory structure for the project\"\"\"\n",
-    "        os.makedirs(self._root_dir, exist_ok=True)\n",
-    "        # Create datasets directory\n",
-    "        os.makedirs(os.path.join(self._root_dir, \"datasets\"), exist_ok=True)\n",
-    "        # Create experiments directory\n",
-    "        os.makedirs(os.path.join(self._root_dir, \"experiments\"), exist_ok=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "ca7aedc9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch(cls_method=True)\n",
-    "def create(\n",
-    "    cls: Project,\n",
-    "    name: str,\n",
-    "    description: str = \"\",\n",
-    "    backend: rt.SUPPORTED_BACKENDS = \"local\",\n",
-    "    root_dir: t.Optional[str] = None,\n",
-    "    ragas_api_client: t.Optional[RagasApiClient] = None,\n",
-    "):\n",
-    "    if backend == \"ragas_app\":\n",
-    "        ragas_api_client = ragas_api_client or RagasApiClientFactory.create()\n",
-    "        sync_version = async_to_sync(ragas_api_client.create_project)\n",
-    "        new_project = sync_version(title=name, description=description)\n",
-    "        return cls(new_project[\"id\"], backend=\"ragas_api\", ragas_api_client=ragas_api_client)\n",
-    "    elif backend == \"local\":\n",
-    "        if root_dir is None:\n",
-    "            raise ValueError(\"root_dir is required for local backend\")\n",
-    "        # For local backend, we use the name as the project_id\n",
-    "        project_id = name\n",
-    "        return cls(project_id, backend=\"local\", root_dir=root_dir)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "156e8b05",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from tempfile import TemporaryDirectory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "755b526f",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test local backend\n",
-    "with TemporaryDirectory() as temp_dir:\n",
-    "    local_project = Project.create(\n",
-    "        name=\"test_local_project\",\n",
-    "        description=\"A test project using local backend\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    \n",
-    "    # assert folder exists\n",
-    "    assert os.path.exists(os.path.join(temp_dir, \"test_local_project\"))\n",
-    "    assert os.path.exists(os.path.join(temp_dir, \"test_local_project\", \"datasets\"))\n",
-    "    assert os.path.exists(os.path.join(temp_dir, \"test_local_project\", \"experiments\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "8ddcbb8a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "def delete(self: Project):\n",
-    "    if self.backend == \"ragas_app\":\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.delete_project)\n",
-    "        sync_version(project_id=self.project_id)\n",
-    "        print(\"Project deleted from Ragas API!\")\n",
-    "    elif self.backend == \"local\":\n",
-    "        import shutil\n",
-    "        # Caution: this deletes the entire project directory\n",
-    "        if os.path.exists(self._root_dir):\n",
-    "            shutil.rmtree(self._root_dir)\n",
-    "            print(f\"Local project at {self._root_dir} deleted!\")\n",
-    "        else:\n",
-    "            print(f\"Local project at {self._root_dir} does not exist\")\n",
-    "\n",
-    "    @patch\n",
-    "    def __repr__(self: Project):\n",
-    "        return f\"Project(name='{self.name}', backend='{self.backend}')\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "c2b77dc7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Local project at /var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/tmpmjfsgyh6/test_local_project deleted!\n"
-     ]
-    }
-   ],
-   "source": [
-    "# test delete\n",
-    "with TemporaryDirectory() as temp_dir:\n",
-    "    local_project = Project.create(\n",
-    "        name=\"test_local_project\",\n",
-    "        description=\"A test project using local backend\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    assert os.path.exists(os.path.join(temp_dir, \"test_local_project\"))\n",
-    "    local_project.delete()\n",
-    "    assert not os.path.exists(os.path.join(temp_dir, \"test_local_project\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "0d2f1041",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch(cls_method=True)\n",
-    "def get(\n",
-    "    cls: Project, \n",
-    "    name: str, \n",
-    "    backend: rt.SUPPORTED_BACKENDS = \"local\",\n",
-    "    root_dir: t.Optional[str] = None,\n",
-    "    ragas_api_client: t.Optional[RagasApiClient] = None,\n",
-    ") -> Project:\n",
-    "    \"\"\"Get an existing project by name.\n",
-    "    \n",
-    "    Args:\n",
-    "        name: The name of the project to get\n",
-    "        backend: The backend to use (ragas_api or local)\n",
-    "        root_dir: The root directory for local backends\n",
-    "        ragas_api_client: Optional custom Ragas API client\n",
-    "        \n",
-    "    Returns:\n",
-    "        Project: The project instance\n",
-    "    \"\"\"\n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Search for project with given name in Ragas API\n",
-    "        if ragas_api_client is None:\n",
-    "            ragas_api_client = RagasApiClientFactory.create()\n",
-    "\n",
-    "        # get the project by name\n",
-    "        sync_version = async_to_sync(ragas_api_client.get_project_by_name)\n",
-    "        project_info = sync_version(project_name=name)\n",
-    "\n",
-    "        # Return Project instance\n",
-    "        return Project(\n",
-    "            project_id=project_info[\"id\"],\n",
-    "            backend=\"ragas_app\",\n",
-    "            ragas_api_client=ragas_api_client,\n",
-    "        )\n",
-    "    elif backend == \"local\":\n",
-    "        if root_dir is None:\n",
-    "            raise ValueError(\"root_dir is required for local backend\")\n",
-    "            \n",
-    "        # For local backend, check if project directory exists\n",
-    "        project_path = os.path.join(root_dir, name)\n",
-    "        if not os.path.exists(project_path):\n",
-    "            raise ValueError(f\"Local project '{name}' does not exist at {project_path}\")\n",
-    "            \n",
-    "        # Return Project instance\n",
-    "        return Project(\n",
-    "            project_id=name,\n",
-    "            backend=\"local\",\n",
-    "            root_dir=root_dir,\n",
-    "        )\n",
-    "    else:\n",
-    "        raise ValueError(f\"Invalid backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "ea1a2fe9",
-   "metadata": {},
-   "source": [
-    "additional utils functions to get dataset and experiment path."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "d5994592",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch\n",
-    "def get_dataset_path(self: Project, dataset_name: str) -> str:\n",
-    "    \"\"\"Get the path to a dataset file in the local backend\"\"\"\n",
-    "    if self.backend != \"local\":\n",
-    "        raise ValueError(\"This method is only available for local backend\")\n",
-    "    return os.path.join(self._root_dir, \"datasets\", f\"{dataset_name}.csv\")\n",
-    "\n",
-    "@patch\n",
-    "def get_experiment_path(self: Project, experiment_name: str) -> str:\n",
-    "    \"\"\"Get the path to an experiment file in the local backend\"\"\"\n",
-    "    if self.backend != \"local\":\n",
-    "        raise ValueError(\"This method is only available for local backend\")\n",
-    "    return os.path.join(self._root_dir, \"experiments\", f\"{experiment_name}.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "591f0632",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Created project: Project(name='test_local_project', backend='local')\n",
-      "Project directory exists: True\n",
-      "Datasets directory exists: True\n",
-      "Experiments directory exists: True\n",
-      "Retrieved project: Project(name='test_local_project', backend='local')\n",
-      "Dataset path: /var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/tmpzi2vmteq/test_local_project/datasets/example_dataset.csv\n",
-      "Experiment path: /var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/tmpzi2vmteq/test_local_project/experiments/example_experiment.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Example of using the local backend\n",
-    "import tempfile\n",
-    "import os\n",
-    "\n",
-    "# Create a temporary directory for demonstration\n",
-    "with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "    # Create a new project with local backend\n",
-    "    local_project = Project.create(\n",
-    "        name=\"test_local_project\",\n",
-    "        description=\"A test project using local backend\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    \n",
-    "    print(f\"Created project: {local_project}\")\n",
-    "    \n",
-    "    # Check the directory structure\n",
-    "    project_path = os.path.join(temp_dir, \"test_local_project\")\n",
-    "    print(f\"Project directory exists: {os.path.exists(project_path)}\")\n",
-    "    print(f\"Datasets directory exists: {os.path.exists(os.path.join(project_path, 'datasets'))}\")\n",
-    "    print(f\"Experiments directory exists: {os.path.exists(os.path.join(project_path, 'experiments'))}\")\n",
-    "    \n",
-    "    # Get the project\n",
-    "    retrieved_project = Project.get(\n",
-    "        name=\"test_local_project\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    \n",
-    "    print(f\"Retrieved project: {retrieved_project}\")\n",
-    "    \n",
-    "    # Get paths for datasets and experiments\n",
-    "    dataset_path = local_project.get_dataset_path(\"example_dataset\")\n",
-    "    experiment_path = local_project.get_experiment_path(\"example_experiment\")\n",
-    "    \n",
-    "    print(f\"Dataset path: {dataset_path}\")\n",
-    "    print(f\"Experiment path: {experiment_path}\")\n",
-    "    \n",
-    "    # Delete the project (not needed when using with tempfile)\n",
-    "    # local_project.delete()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/experimental/old_nbs/api/project/core.md b/experimental/old_nbs/api/project/core.md
deleted file mode 100644
index 0835cefc9..000000000
--- a/experimental/old_nbs/api/project/core.md
+++ /dev/null
@@ -1,148 +0,0 @@
----
-jupyter:
-  jupytext:
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.3'
-      jupytext_version: 1.17.1
-  kernelspec:
-    display_name: .venv
-    language: python
-    name: python3
----
-
-# Project
-
-> Use this class to represent the AI project that we are working on and to interact with datasets and experiments in it.
-
-```python
-# | default_exp project.core
-```
-
-```python
-# | hide
-from nbdev.showdoc import *
-```
-
-```python
-
-from ragas_experimental.model.notion_model import NotionModel
-```
-
-```python
-# | export
-import typing as t
-import os
-import asyncio
-
-from fastcore.utils import patch
-from pydantic import BaseModel
-
-from ragas_experimental.backends.factory import RagasApiClientFactory
-from ragas_experimental.backends.ragas_api_client import RagasApiClient
-import ragas_experimental.typing as rt
-from ragas_experimental.utils import async_to_sync, create_nano_id
-from ragas_experimental.dataset import Dataset
-from ragas_experimental.experiment import Experiment
-```
-
-```python
-# | export
-class Project:
-    def __init__(
-        self,
-        project_id: str,
-        backend: t.Literal["ragas_api", "local"] = "local",
-        root_dir: t.Optional[str] = None,
-        ragas_api_client: t.Optional[RagasApiClient] = None,
-    ):
-        self.project_id = project_id
-        if backend == "local":
-            self._root_dir = root_dir
-        elif backend == "ragas_api":
-            if ragas_api_client is None:
-                self._ragas_api_client = RagasApiClientFactory.create()
-            else:
-                self._ragas_api_client = ragas_api_client
-        else:
-            raise ValueError(f"Invalid backend: {backend}")
-        # create the project
-        if backend == "ragas_api":
-            try:
-                sync_version = async_to_sync(self._ragas_api_client.get_project)
-                existing_project = sync_version(project_id=self.project_id)
-                self.project_id = existing_project["id"]
-                self.name = existing_project["title"]
-                self.description = existing_project["description"]
-            except Exception as e:
-                raise e
-        elif backend == "local":
-            self.name = self.project_id
-            # create a new folder in the root_dir/project_id
-            self._root_dir.mkdir(parents=True, exist_ok=True)
-
-    @classmethod
-    def create(
-        cls,
-        name: str,
-        description: str = "",
-        ragas_api_client: t.Optional[RagasApiClient] = None,
-    ):
-        ragas_api_client = RagasApiClientFactory.create()
-        sync_version = async_to_sync(ragas_api_client.create_project)
-        new_project = sync_version(title=name, description=description)
-        return cls(new_project["id"], ragas_api_client)
-
-    def delete(self):
-        sync_version = async_to_sync(self._ragas_api_client.delete_project)
-        sync_version(project_id=self.project_id)
-        print("Project deleted!")
-
-    def __repr__(self):
-        return f"Project(name='{self.name}')"
-```
-
-```python
-RAGAS_APP_TOKEN = "api-key"
-RAGAS_API_BASE_URL = "https://api.dev.app.ragas.io"
-
-os.environ["RAGAS_APP_TOKEN"] = RAGAS_APP_TOKEN
-os.environ["RAGAS_API_BASE_URL"] = RAGAS_API_BASE_URL
-```
-
-```python
-#project = Project.create("Demo Project")
-project = Project(project_id="1ef0843b-231f-4a2c-b64d-d39bcee9d830")
-project
-```
-
-```python
-# | export
-@patch(cls_method=True)
-def get(cls: Project, name: str, ragas_api_client: t.Optional[RagasApiClient] = None) -> Project:
-    """Get an existing project by name."""
-    # Search for project with given name
-    if ragas_api_client is None:
-        ragas_api_client = RagasApiClientFactory.create()
-
-    # get the project by name
-    sync_version = async_to_sync(ragas_api_client.get_project_by_name)
-    project_info = sync_version(
-        project_name=name
-    )
-
-    # Return Project instance
-    return Project(
-        project_id=project_info["id"],
-        ragas_api_client=ragas_api_client,
-    )
-```
-
-```python
-Project.get("SuperMe")
-```
-
-```python
-#project.delete()
-```
diff --git a/experimental/old_nbs/api/project/datasets.ipynb b/experimental/old_nbs/api/project/datasets.ipynb
deleted file mode 100644
index 120170e21..000000000
--- a/experimental/old_nbs/api/project/datasets.ipynb
+++ /dev/null
@@ -1,1084 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "b89e33f6",
-   "metadata": {},
-   "source": [
-    "# Dataset Management\n",
-    "\n",
-    "> Methods to create and manage datasets within projects"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "6ea028c4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp project.datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "6842baad",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | hide\n",
-    "from nbdev.showdoc import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "83f0eee7",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# | export\n",
-    "import typing as t\n",
-    "import os\n",
-    "import asyncio\n",
-    "import tempfile\n",
-    "\n",
-    "from fastcore.utils import patch\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "from ragas_experimental.project.core import Project\n",
-    "from ragas_experimental.typing import SUPPORTED_BACKENDS\n",
-    "from ragas_experimental.backends.factory import RagasApiClientFactory\n",
-    "from ragas_experimental.backends.ragas_api_client import RagasApiClient\n",
-    "import ragas_experimental.typing as rt\n",
-    "from ragas_experimental.utils import async_to_sync, create_nano_id\n",
-    "from ragas_experimental.dataset import Dataset\n",
-    "from ragas_experimental.utils import get_test_directory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "381d6909",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "async def create_dataset_columns(project_id, dataset_id, columns, create_dataset_column_func):\n",
-    "    tasks = []\n",
-    "    for column in columns:\n",
-    "        tasks.append(create_dataset_column_func(\n",
-    "            project_id=project_id,\n",
-    "            dataset_id=dataset_id,\n",
-    "            id=create_nano_id(),\n",
-    "            name=column[\"name\"],\n",
-    "            type=column[\"type\"],\n",
-    "            settings=column[\"settings\"],\n",
-    "        ))\n",
-    "    return await asyncio.gather(*tasks)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "f0a1a475",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def get_dataset_from_ragas_app(\n",
-    "    self: Project, \n",
-    "    name: str, \n",
-    "    model: t.Type[BaseModel]\n",
-    ") -> Dataset:\n",
-    "    \"\"\"Create a dataset in the Ragas App backend.\"\"\"\n",
-    "    # create the dataset\n",
-    "    sync_version = async_to_sync(self._ragas_api_client.create_dataset)\n",
-    "    dataset_info = sync_version(\n",
-    "        project_id=self.project_id,\n",
-    "        name=name if name is not None else model.__name__,\n",
-    "    )\n",
-    "\n",
-    "    # create the columns for the dataset\n",
-    "    column_types = rt.ModelConverter.model_to_columns(model)\n",
-    "    sync_version = async_to_sync(create_dataset_columns)\n",
-    "    sync_version(\n",
-    "        project_id=self.project_id,\n",
-    "        dataset_id=dataset_info[\"id\"],\n",
-    "        columns=column_types,\n",
-    "        create_dataset_column_func=self._ragas_api_client.create_dataset_column,\n",
-    "    )\n",
-    "        \n",
-    "    # Return a new Dataset instance\n",
-    "    return Dataset(\n",
-    "        name=name if name is not None else model.__name__,\n",
-    "        model=model,\n",
-    "        datatable_type=\"datasets\",\n",
-    "        project_id=self.project_id,\n",
-    "        dataset_id=dataset_info[\"id\"],\n",
-    "        ragas_api_client=self._ragas_api_client,\n",
-    "        backend=\"ragas_app\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "aed39788",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def get_dataset_from_local(\n",
-    "    self: Project,\n",
-    "    name: str,\n",
-    "    model: t.Type[BaseModel]\n",
-    ") -> Dataset:\n",
-    "    \"\"\"Create a dataset in the local filesystem backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        name: Name of the dataset\n",
-    "        model: Pydantic model defining the structure\n",
-    "        \n",
-    "    Returns:\n",
-    "        Dataset: A new dataset configured to use the local backend\n",
-    "    \"\"\"\n",
-    "    # Use a UUID as the dataset ID\n",
-    "    dataset_id = create_nano_id()\n",
-    "    \n",
-    "    # Return a new Dataset instance with local backend\n",
-    "    return Dataset(\n",
-    "        name=name if name is not None else model.__name__,\n",
-    "        model=model,\n",
-    "        datatable_type=\"datasets\",\n",
-    "        project_id=self.project_id,\n",
-    "        dataset_id=dataset_id,\n",
-    "        backend=\"local\",\n",
-    "        local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "ae500be8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def create_dataset(\n",
-    "    self: Project, \n",
-    "    model: t.Type[BaseModel], \n",
-    "    name: t.Optional[str] = None,\n",
-    "    backend: t.Optional[SUPPORTED_BACKENDS] = None\n",
-    ") -> Dataset:\n",
-    "    \"\"\"Create a new dataset.\n",
-    "\n",
-    "    Args:\n",
-    "        model: Model class defining the dataset structure\n",
-    "        name: Name of the dataset (defaults to model name if not provided)\n",
-    "        backend: The backend to use (defaults to project's backend if not specified)\n",
-    "\n",
-    "    Returns:\n",
-    "        Dataset: A new dataset object for managing entries\n",
-    "    \"\"\"\n",
-    "    # If name is not provided, use the model name\n",
-    "    if name is None:\n",
-    "        name = model.__name__\n",
-    "        \n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "\n",
-    "    # Create dataset using the appropriate backend\n",
-    "    if backend == \"local\":\n",
-    "        return get_dataset_from_local(self, name, model)\n",
-    "    elif backend == \"ragas_app\":\n",
-    "        return get_dataset_from_ragas_app(self, name, model)\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "2c166d04",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "CSV file exists: True\n",
-      "CSV content:\n",
-      "_row_id,id,name\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "import tempfile\n",
-    "from pydantic import BaseModel\n",
-    "from ragas_experimental import Project\n",
-    "\n",
-    "# Create a test directory\n",
-    "with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "    # Create a project\n",
-    "    project = Project.create(\n",
-    "        name=\"test_project\",\n",
-    "        description=\"Test project\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    \n",
-    "    # Define a test model\n",
-    "    class TestModel(BaseModel):\n",
-    "        id: int\n",
-    "        name: str\n",
-    "    \n",
-    "    # Create a dataset\n",
-    "    dataset = project.create_dataset(\n",
-    "        model=TestModel,\n",
-    "        name=\"test_dataset\"\n",
-    "    )\n",
-    "    \n",
-    "    # Check if CSV file exists\n",
-    "    csv_path = os.path.join(temp_dir, \"test_project\", \"datasets\", \"test_dataset.csv\")\n",
-    "    print(f\"CSV file exists: {os.path.exists(csv_path)}\")\n",
-    "    \n",
-    "    # Read CSV content\n",
-    "    if os.path.exists(csv_path):\n",
-    "        with open(csv_path, 'r') as f:\n",
-    "            print(\"CSV content:\")\n",
-    "            print(f.read())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "0776c897",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental.metric import MetricResult\n",
-    "from ragas_experimental import Project\n",
-    "from ragas_experimental.utils import get_test_directory\n",
-    "\n",
-    "from fastcore.test import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "9d851ffd",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tmp_root_dir = get_test_directory()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "34a5adfa",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# test model\n",
-    "class DatasetModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "\n",
-    "class ExperimentModel(DatasetModel):\n",
-    "    tags: t.Literal[\"tag1\", \"tag2\", \"tag3\"]\n",
-    "    result: MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "id": "a50e3d42",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "p = Project.create(name=\"test_project\", backend=\"local\", root_dir=tmp_root_dir)\n",
-    "dataset_with_dataset_model = p.create_dataset(name=\"dataset_with_dataset_model\", model=DatasetModel)\n",
-    "dataset_with_experiment_model = p.create_dataset(name=\"dataset_with_experiment_model\", model=ExperimentModel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "id": "16972bbf",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['dataset_with_dataset_model.csv', 'dataset_with_experiment_model.csv']"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "os.listdir(os.path.join(tmp_root_dir, \"test_project\", \"datasets\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "03b0be74",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "LocalBackend(local_root_dir=/var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/ragas_test_dcqQZIcawxpX, project_id=test_project, dataset_id=nckelCD21MKD, dataset_name=dataset_with_dataset_model)"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset_with_dataset_model._backend"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "id": "b19996ad",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_eq(os.path.exists(\n",
-    "    os.path.join(tmp_root_dir, \"test_project\", \"datasets\", f'{dataset_with_dataset_model.name}.csv')\n",
-    "), True)\n",
-    "\n",
-    "test_eq(os.path.exists(\n",
-    "    os.path.join(tmp_root_dir, \"test_project\", \"datasets\", f'{dataset_with_experiment_model.name}.csv')\n",
-    "), True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "id": "d402bb6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get_dataset_by_id(\n",
-    "    self: Project, \n",
-    "    dataset_id: str, \n",
-    "    model: t.Type[BaseModel],\n",
-    "    backend: t.Optional[SUPPORTED_BACKENDS] = None\n",
-    ") -> Dataset:\n",
-    "    \"\"\"Get an existing dataset by ID.\n",
-    "    \n",
-    "    Args:\n",
-    "        dataset_id: The ID of the dataset to retrieve\n",
-    "        model: The model class to use for the dataset entries\n",
-    "        backend: The backend to use (defaults to project's backend)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Dataset: The retrieved dataset\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Search for database with given ID\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.get_dataset)\n",
-    "        dataset_info = sync_version(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=dataset_id\n",
-    "        )\n",
-    "\n",
-    "        # For now, return Dataset without model type\n",
-    "        return Dataset(\n",
-    "            name=dataset_info[\"name\"],\n",
-    "            model=model,\n",
-    "            datatable_type=\"datasets\",\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=dataset_id,\n",
-    "            ragas_api_client=self._ragas_api_client,\n",
-    "            backend=\"ragas_app\"\n",
-    "        )\n",
-    "    elif backend == \"local\":\n",
-    "        # For local backend, this is not a typical operation since we use names\n",
-    "        # We could maintain a mapping of IDs to names, but for now just raise an error\n",
-    "        raise NotImplementedError(\n",
-    "            \"get_dataset_by_id is not implemented for local backend. \"\n",
-    "            \"Use get_dataset with the dataset name instead.\"\n",
-    "        )\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "id": "53688362",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get_dataset(\n",
-    "    self: Project, \n",
-    "    dataset_name: str, \n",
-    "    model: t.Type[BaseModel],\n",
-    "    backend: t.Optional[SUPPORTED_BACKENDS] = None\n",
-    ") -> Dataset:\n",
-    "    \"\"\"Get an existing dataset by name.\n",
-    "    \n",
-    "    Args:\n",
-    "        dataset_name: The name of the dataset to retrieve\n",
-    "        model: The model class to use for the dataset entries\n",
-    "        backend: The backend to use (defaults to project's backend if not specified)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Dataset: The retrieved dataset\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Search for dataset with given name\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.get_dataset_by_name)\n",
-    "        dataset_info = sync_version(\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_name=dataset_name\n",
-    "        )\n",
-    "\n",
-    "        # Return Dataset instance\n",
-    "        return Dataset(\n",
-    "            name=dataset_info[\"name\"],\n",
-    "            model=model,\n",
-    "            datatable_type=\"datasets\",\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=dataset_info[\"id\"],\n",
-    "            ragas_api_client=self._ragas_api_client,\n",
-    "            backend=\"ragas_app\"\n",
-    "        )\n",
-    "    elif backend == \"local\":\n",
-    "        # Check if the dataset file exists\n",
-    "        dataset_path = self.get_dataset_path(dataset_name)\n",
-    "        if not os.path.exists(dataset_path):\n",
-    "            raise ValueError(f\"Dataset '{dataset_name}' does not exist\")\n",
-    "            \n",
-    "        # Create dataset instance with a random ID\n",
-    "        dataset_id = create_nano_id()\n",
-    "        \n",
-    "        # Return Dataset instance\n",
-    "        return Dataset(\n",
-    "            name=dataset_name,\n",
-    "            model=model,\n",
-    "            datatable_type=\"datasets\",\n",
-    "            project_id=self.project_id,\n",
-    "            dataset_id=dataset_id,\n",
-    "            backend=\"local\",\n",
-    "            local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects\n",
-    "        )\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "id": "07d859b8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def list_dataset_names(\n",
-    "    self: Project,\n",
-    "    backend: t.Optional[SUPPORTED_BACKENDS] = None\n",
-    ") -> t.List[str]:\n",
-    "    \"\"\"List all datasets in the project.\n",
-    "    \n",
-    "    Args:\n",
-    "        backend: The backend to use (defaults to project's backend)\n",
-    "        \n",
-    "    Returns:\n",
-    "        List[str]: Names of all datasets in the project\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Get all datasets from API\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.list_datasets)\n",
-    "        datasets = sync_version(project_id=self.project_id)\n",
-    "        return [dataset[\"name\"] for dataset in datasets]\n",
-    "    elif backend == \"local\":\n",
-    "        # Get all CSV files in the datasets directory\n",
-    "        datasets_dir = os.path.join(self._root_dir, \"datasets\")\n",
-    "        if not os.path.exists(datasets_dir):\n",
-    "            return []\n",
-    "            \n",
-    "        return [\n",
-    "            os.path.splitext(f)[0] for f in os.listdir(datasets_dir)\n",
-    "            if f.endswith('.csv')\n",
-    "        ]\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "id": "93ed3cf9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Define a test model for demonstration\n",
-    "class TestModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    tags: t.Literal[\"tag1\", \"tag2\", \"tag3\"]\n",
-    "    tags_color_coded: t.Annotated[t.Literal[\"red\", \"green\", \"blue\"], rt.Select(colors=[\"red\", \"green\", \"blue\"])]\n",
-    "    url: t.Annotated[str, rt.Url()] = \"https://www.google.com\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "55f47f48",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "_row_id,id,name,description,score\n",
-      "bd7d69f5-8836-4e79-a191-b466dc9c4818,0,Test Item 0,Description for item 0,0.0\n",
-      "70d04a30-9697-4f41-a7d0-62e7c733ccdd,1,Test Item 1,Description for item 1,0.5\n",
-      "a9ca1712-51cb-4220-a050-6d1df060d2d1,2,Test Item 2,Description for item 2,1.0\n",
-      "\n",
-      "Retrieved dataset: Dataset(name='test_dataset_comprehensive', model=LocalTestModel, len=3)\n",
-      "Updated entry: id=1 name='Updated Name' description='Description for item 1' score=9.9\n",
-      "\n",
-      "DataFrame (first 2 rows):\n",
-      "   id          name             description  score\n",
-      "0   0   Test Item 0  Description for item 0    0.0\n",
-      "1   1  Updated Name  Description for item 1    9.9\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Use a persistent test directory \n",
-    "test_dir = get_test_directory()\n",
-    "\n",
-    "# Create a new project with local backend\n",
-    "local_project = Project.create(\n",
-    "    name=\"test_local_project_comprehensive\",\n",
-    "    description=\"A test project using local backend\",\n",
-    "    backend=\"local\",\n",
-    "    root_dir=test_dir\n",
-    ")\n",
-    "\n",
-    "# Define a test model\n",
-    "class LocalTestModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    score: float\n",
-    "\n",
-    "# Create a dataset with local backend\n",
-    "local_dataset = local_project.create_dataset(\n",
-    "    model=LocalTestModel,\n",
-    "    name=\"test_dataset_comprehensive\"\n",
-    ")\n",
-    "\n",
-    "# Add some entries\n",
-    "for i in range(3):\n",
-    "    entry = LocalTestModel(\n",
-    "        id=i,\n",
-    "        name=f\"Test Item {i}\",\n",
-    "        description=f\"Description for item {i}\",\n",
-    "        score=i * 0.5\n",
-    "    )\n",
-    "    local_dataset.append(entry)\n",
-    "\n",
-    "# Check the dataset\n",
-    "#print(f\"Dataset after adding entries: {local_dataset}\")\n",
-    "test_eq(len(local_dataset), 3)\n",
-    "\n",
-    "# Get the dataset path\n",
-    "dataset_path = local_project.get_dataset_path(local_dataset.name)\n",
-    "#print(f\"Dataset file path: {dataset_path}\")\n",
-    "test_eq(\n",
-    "    os.path.join(test_dir, local_project.name, 'datasets', f'{local_dataset.name}.csv'),\n",
-    "    dataset_path\n",
-    ")\n",
-    "\n",
-    "# open and print raw csv file\n",
-    "with open(os.path.join(test_dir, local_project.name, 'datasets', f'{local_dataset.name}.csv')) as f:\n",
-    "    print(f.read())\n",
-    "\n",
-    "# Get the dataset by name\n",
-    "retrieved_dataset = local_project.get_dataset(\n",
-    "    dataset_name=\"test_dataset_comprehensive\",\n",
-    "    model=LocalTestModel\n",
-    ")\n",
-    "        \n",
-    "# Load entries\n",
-    "retrieved_dataset.load()\n",
-    "print(f\"Retrieved dataset: {retrieved_dataset}\")\n",
-    "\n",
-    "# Modify an entry\n",
-    "entry = retrieved_dataset[1]  \n",
-    "entry.name = \"Updated Name\"\n",
-    "entry.score = 9.9\n",
-    "retrieved_dataset.save(entry)\n",
-    "\n",
-    "# Load again to verify changes\n",
-    "retrieved_dataset.load()\n",
-    "print(f\"Updated entry: {retrieved_dataset[1]}\")\n",
-    "\n",
-    "# Convert to DataFrame\n",
-    "df = retrieved_dataset.to_pandas()\n",
-    "print(\"\\nDataFrame (first 2 rows):\")\n",
-    "print(df.head(2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "id": "a55b1028",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'\\nimport os\\nfrom pydantic import BaseModel\\n\\n# Set environment variables for API access\\nRAGAS_APP_TOKEN = \"your-api-key\"\\nRAGAS_API_BASE_URL = \"https://api.dev.app.ragas.io\"\\nos.environ[\"RAGAS_APP_TOKEN\"] = RAGAS_APP_TOKEN\\nos.environ[\"RAGAS_API_BASE_URL\"] = RAGAS_API_BASE_URL\\n\\n# Get a project from the Ragas API\\nragas_app_project = Project.get(\\n    name=\"Your Project Name\",\\n    backend=\"ragas_app\"\\n)\\n\\n# Define a test model\\nclass ApiTestModel(BaseModel):\\n    id: int\\n    name: str\\n    description: str\\n    score: float\\n\\n# Create a dataset with ragas_app backend\\napi_dataset = ragas_app_project.create_dataset(\\n    model=ApiTestModel,\\n    name=\"api_test_dataset\",\\n    backend=\"ragas_app\"\\n)\\n\\n# Add some entries\\nfor i in range(3):\\n    entry = ApiTestModel(\\n        id=i,\\n        name=f\"API Test Item {i}\",\\n        description=f\"Description for API item {i}\",\\n        score=i * 1.1\\n    )\\n    api_dataset.append(entry)\\n\\n# List all datasets in the project\\ndataset_names = ragas_app_project.list_dataset_names(backend=\"ragas_app\")\\nprint(f\"Datasets in project: {dataset_names}\")\\n\\n# Get the dataset by name\\nretrieved_dataset = ragas_app_project.get_dataset(\\n    dataset_name=\"api_test_dataset\",\\n    model=ApiTestModel,\\n    backend=\"ragas_app\"\\n)\\n\\n# Load entries\\nretrieved_dataset.load()\\nprint(f\"Retrieved dataset: {retrieved_dataset}\")\\n\\n# View as DataFrame\\ndf = retrieved_dataset.to_pandas()\\nprint(\"\\nDataFrame:\")\\nprint(df)\\n'"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# Example of using ragas_app backend (commented out since it requires API access)\n",
-    "'''\n",
-    "import os\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "# Set environment variables for API access\n",
-    "RAGAS_APP_TOKEN = \"your-api-key\"\n",
-    "RAGAS_API_BASE_URL = \"https://api.dev.app.ragas.io\"\n",
-    "os.environ[\"RAGAS_APP_TOKEN\"] = RAGAS_APP_TOKEN\n",
-    "os.environ[\"RAGAS_API_BASE_URL\"] = RAGAS_API_BASE_URL\n",
-    "\n",
-    "# Get a project from the Ragas API\n",
-    "ragas_app_project = Project.get(\n",
-    "    name=\"Your Project Name\",\n",
-    "    backend=\"ragas_app\"\n",
-    ")\n",
-    "\n",
-    "# Define a test model\n",
-    "class ApiTestModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    score: float\n",
-    "\n",
-    "# Create a dataset with ragas_app backend\n",
-    "api_dataset = ragas_app_project.create_dataset(\n",
-    "    model=ApiTestModel,\n",
-    "    name=\"api_test_dataset\",\n",
-    "    backend=\"ragas_app\"\n",
-    ")\n",
-    "\n",
-    "# Add some entries\n",
-    "for i in range(3):\n",
-    "    entry = ApiTestModel(\n",
-    "        id=i,\n",
-    "        name=f\"API Test Item {i}\",\n",
-    "        description=f\"Description for API item {i}\",\n",
-    "        score=i * 1.1\n",
-    "    )\n",
-    "    api_dataset.append(entry)\n",
-    "\n",
-    "# List all datasets in the project\n",
-    "dataset_names = ragas_app_project.list_dataset_names(backend=\"ragas_app\")\n",
-    "print(f\"Datasets in project: {dataset_names}\")\n",
-    "\n",
-    "# Get the dataset by name\n",
-    "retrieved_dataset = ragas_app_project.get_dataset(\n",
-    "    dataset_name=\"api_test_dataset\",\n",
-    "    model=ApiTestModel,\n",
-    "    backend=\"ragas_app\"\n",
-    ")\n",
-    "\n",
-    "# Load entries\n",
-    "retrieved_dataset.load()\n",
-    "print(f\"Retrieved dataset: {retrieved_dataset}\")\n",
-    "\n",
-    "# View as DataFrame\n",
-    "df = retrieved_dataset.to_pandas()\n",
-    "print(\"\\nDataFrame:\")\n",
-    "print(df)\n",
-    "'''"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "id": "2f761688",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def update_dataset_class_for_local_backend():\n",
-    "    \"\"\"Updates the Dataset class to support local backend.\n",
-    "    \n",
-    "    This is called when the module is imported to patch the Dataset class\n",
-    "    with methods that enable local backend support.\n",
-    "    \"\"\"\n",
-    "    from ragas_experimental.dataset import Dataset\n",
-    "    import csv\n",
-    "    import os\n",
-    "    import uuid\n",
-    "    \n",
-    "    # Add backend parameter to Dataset.__init__\n",
-    "    original_init = Dataset.__init__\n",
-    "    \n",
-    "    def new_init(\n",
-    "        self,\n",
-    "        name: str,\n",
-    "        model: t.Type[BaseModel],\n",
-    "        project_id: str,\n",
-    "        dataset_id: str,\n",
-    "        ragas_api_client=None,\n",
-    "        backend: t.Literal[\"ragas_app\", \"local\"] = \"ragas_app\",\n",
-    "        local_root_dir: t.Optional[str] = None,\n",
-    "    ):\n",
-    "        self.backend = backend\n",
-    "        self.local_root_dir = local_root_dir\n",
-    "        \n",
-    "        if backend == \"local\":\n",
-    "            if local_root_dir is None:\n",
-    "                raise ValueError(\"local_root_dir is required for local backend\")\n",
-    "                \n",
-    "            # Set basic properties\n",
-    "            self.name = name\n",
-    "            self.model = model\n",
-    "            self.project_id = project_id\n",
-    "            self.dataset_id = dataset_id\n",
-    "            self._ragas_api_client = None\n",
-    "            self._entries = []\n",
-    "            \n",
-    "            # Setup column mapping\n",
-    "            if not hasattr(self.model, \"__column_mapping__\"):\n",
-    "                self.model.__column_mapping__ = {}\n",
-    "                \n",
-    "            # For local backend, columns map directly to field names\n",
-    "            for field_name in model.__annotations__:\n",
-    "                self.model.__column_mapping__[field_name] = field_name\n",
-    "                \n",
-    "            # Load entries from CSV if it exists\n",
-    "            self._load_from_csv()\n",
-    "        else:\n",
-    "            # Call original init for ragas_app backend\n",
-    "            original_init(self, name, model, project_id, dataset_id, ragas_api_client)\n",
-    "    \n",
-    "    # Add method to load from CSV\n",
-    "    def _load_from_csv(self):\n",
-    "        \"\"\"Load dataset entries from CSV file.\"\"\"\n",
-    "        if self.backend != \"local\":\n",
-    "            return\n",
-    "            \n",
-    "        # Construct CSV path\n",
-    "        project_dir = os.path.join(self.local_root_dir, self.project_id)\n",
-    "        csv_path = os.path.join(project_dir, \"datasets\", f\"{self.name}.csv\")\n",
-    "        \n",
-    "        if not os.path.exists(csv_path):\n",
-    "            return\n",
-    "            \n",
-    "        # Read CSV\n",
-    "        with open(csv_path, 'r', newline='') as f:\n",
-    "            reader = csv.DictReader(f)\n",
-    "            \n",
-    "            # Clear existing entries\n",
-    "            self._entries.clear()\n",
-    "            \n",
-    "            # Process rows\n",
-    "            for row in reader:\n",
-    "                try:\n",
-    "                    # Convert types as needed based on model annotations\n",
-    "                    typed_row = {}\n",
-    "                    for field, value in row.items():\n",
-    "                        if field in self.model.__annotations__:\n",
-    "                            field_type = self.model.__annotations__[field]\n",
-    "                            \n",
-    "                            # Handle basic type conversions\n",
-    "                            if field_type == int:\n",
-    "                                typed_row[field] = int(value) if value else 0\n",
-    "                            elif field_type == float:\n",
-    "                                typed_row[field] = float(value) if value else 0.0\n",
-    "                            elif field_type == bool:\n",
-    "                                typed_row[field] = value.lower() in ('true', 't', 'yes', 'y', '1')\n",
-    "                            else:\n",
-    "                                typed_row[field] = value\n",
-    "                    \n",
-    "                    # Create model instance\n",
-    "                    entry = self.model(**typed_row)\n",
-    "                    \n",
-    "                    # Add row_id for tracking changes\n",
-    "                    entry._row_id = str(uuid.uuid4())\n",
-    "                    \n",
-    "                    self._entries.append(entry)\n",
-    "                except Exception as e:\n",
-    "                    print(f\"Error loading row: {e}\")\n",
-    "    \n",
-    "    # Add method to save to CSV\n",
-    "    def _save_to_csv(self):\n",
-    "        \"\"\"Save all entries to CSV file.\"\"\"\n",
-    "        if self.backend != \"local\":\n",
-    "            return\n",
-    "            \n",
-    "        # Construct CSV path\n",
-    "        project_dir = os.path.join(self.local_root_dir, self.project_id)\n",
-    "        csv_path = os.path.join(project_dir, \"datasets\", f\"{self.name}.csv\")\n",
-    "        \n",
-    "        # Ensure directory exists\n",
-    "        os.makedirs(os.path.dirname(csv_path), exist_ok=True)\n",
-    "        \n",
-    "        # Get field names from model\n",
-    "        field_names = list(self.model.__annotations__.keys())\n",
-    "        \n",
-    "        # Write to CSV\n",
-    "        with open(csv_path, 'w', newline='') as f:\n",
-    "            writer = csv.DictWriter(f, fieldnames=field_names)\n",
-    "            writer.writeheader()\n",
-    "            \n",
-    "            for entry in self._entries:\n",
-    "                # Convert model instance to dict and write row\n",
-    "                writer.writerow(entry.model_dump())\n",
-    "    \n",
-    "    # Patch the original methods to support local backend\n",
-    "    \n",
-    "    # Patch append\n",
-    "    original_append = Dataset.append\n",
-    "    \n",
-    "    def new_append(self, entry):\n",
-    "        if self.backend == \"local\":\n",
-    "            if not isinstance(entry, self.model):\n",
-    "                raise TypeError(f\"Entry must be an instance of {self.model.__name__}\")\n",
-    "                \n",
-    "            # Add row_id for tracking changes\n",
-    "            entry._row_id = str(uuid.uuid4())\n",
-    "            \n",
-    "            # Add to in-memory entries\n",
-    "            self._entries.append(entry)\n",
-    "            \n",
-    "            # Save to CSV\n",
-    "            self._save_to_csv()\n",
-    "        else:\n",
-    "            original_append(self, entry)\n",
-    "    \n",
-    "    # Patch pop\n",
-    "    original_pop = Dataset.pop\n",
-    "    \n",
-    "    def new_pop(self, index=-1):\n",
-    "        if self.backend == \"local\":\n",
-    "            # Remove from in-memory entries\n",
-    "            entry = self._entries.pop(index)\n",
-    "            \n",
-    "            # Save to CSV\n",
-    "            self._save_to_csv()\n",
-    "            \n",
-    "            return entry\n",
-    "        else:\n",
-    "            return original_pop(self, index)\n",
-    "    \n",
-    "    # Patch load\n",
-    "    original_load = Dataset.load\n",
-    "    \n",
-    "    def new_load(self):\n",
-    "        if self.backend == \"local\":\n",
-    "            self._load_from_csv()\n",
-    "        else:\n",
-    "            original_load(self)\n",
-    "    \n",
-    "    # Patch save\n",
-    "    original_save = Dataset.save\n",
-    "    \n",
-    "    def new_save(self, item):\n",
-    "        if self.backend == \"local\":\n",
-    "            if not isinstance(item, self.model):\n",
-    "                raise TypeError(f\"Item must be an instance of {self.model.__name__}\")\n",
-    "                \n",
-    "            # Find the item in our entries\n",
-    "            found = False\n",
-    "            for i, entry in enumerate(self._entries):\n",
-    "                if hasattr(entry, \"_row_id\") and hasattr(item, \"_row_id\") and entry._row_id == item._row_id:\n",
-    "                    # Update the entry\n",
-    "                    self._entries[i] = item\n",
-    "                    found = True\n",
-    "                    break\n",
-    "                    \n",
-    "            if not found:\n",
-    "                # If we didn't find it, add it\n",
-    "                if not hasattr(item, \"_row_id\"):\n",
-    "                    item._row_id = str(uuid.uuid4())\n",
-    "                self._entries.append(item)\n",
-    "                \n",
-    "            # Save to CSV\n",
-    "            self._save_to_csv()\n",
-    "        else:\n",
-    "            original_save(self, item)\n",
-    "    \n",
-    "    # Apply all patches\n",
-    "    Dataset.__init__ = new_init\n",
-    "    Dataset._load_from_csv = _load_from_csv\n",
-    "    Dataset._save_to_csv = _save_to_csv\n",
-    "    Dataset.append = new_append\n",
-    "    Dataset.pop = new_pop\n",
-    "    Dataset.load = new_load\n",
-    "    Dataset.save = new_save\n",
-    "    \n",
-    "    return Dataset\n",
-    "\n",
-    "# Update the Dataset class\n",
-    "updated_dataset_class = update_dataset_class_for_local_backend()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "id": "23a6eabf",
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "update_dataset_class_for_local_backend.<locals>.new_init() got an unexpected keyword argument 'datatable_type'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mTypeError\u001b[39m                                 Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[23]\u001b[39m\u001b[32m, line 24\u001b[39m\n\u001b[32m     21\u001b[39m     score: \u001b[38;5;28mfloat\u001b[39m\n\u001b[32m     23\u001b[39m \u001b[38;5;66;03m# Create a dataset with local backend\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m24\u001b[39m local_dataset = \u001b[43mlocal_project\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     25\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mLocalTestModel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     26\u001b[39m \u001b[43m    \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtest_dataset\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     27\u001b[39m \u001b[43m    \u001b[49m\u001b[43mbackend\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlocal\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m     28\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     30\u001b[39m \u001b[38;5;66;03m# Add some entries to the dataset\u001b[39;00m\n\u001b[32m     31\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[32m5\u001b[39m):\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[7]\u001b[39m\u001b[32m, line 29\u001b[39m, in \u001b[36mcreate_dataset\u001b[39m\u001b[34m(self, model, name, backend)\u001b[39m\n\u001b[32m     27\u001b[39m \u001b[38;5;66;03m# Create dataset using the appropriate backend\u001b[39;00m\n\u001b[32m     28\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m backend == \u001b[33m\"\u001b[39m\u001b[33mlocal\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m---> \u001b[39m\u001b[32m29\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mget_dataset_from_local\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     30\u001b[39m \u001b[38;5;28;01melif\u001b[39;00m backend == \u001b[33m\"\u001b[39m\u001b[33mragas_app\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m     31\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m get_dataset_from_ragas_app(\u001b[38;5;28mself\u001b[39m, name, model)\n",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 20\u001b[39m, in \u001b[36mget_dataset_from_local\u001b[39m\u001b[34m(self, name, model)\u001b[39m\n\u001b[32m     17\u001b[39m dataset_id = create_nano_id()\n\u001b[32m     19\u001b[39m \u001b[38;5;66;03m# Return a new Dataset instance with local backend\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m20\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     21\u001b[39m \u001b[43m    \u001b[49m\u001b[43mname\u001b[49m\u001b[43m=\u001b[49m\u001b[43mname\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m.\u001b[49m\u001b[34;43m__name__\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     22\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     23\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdatatable_type\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mdatasets\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     24\u001b[39m \u001b[43m    \u001b[49m\u001b[43mproject_id\u001b[49m\u001b[43m=\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mproject_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     25\u001b[39m \u001b[43m    \u001b[49m\u001b[43mdataset_id\u001b[49m\u001b[43m=\u001b[49m\u001b[43mdataset_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     26\u001b[39m \u001b[43m    \u001b[49m\u001b[43mbackend\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlocal\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     27\u001b[39m \u001b[43m    \u001b[49m\u001b[43mlocal_root_dir\u001b[49m\u001b[43m=\u001b[49m\u001b[43mos\u001b[49m\u001b[43m.\u001b[49m\u001b[43mpath\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdirname\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_root_dir\u001b[49m\u001b[43m)\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Root dir for all projects\u001b[39;49;00m\n\u001b[32m     28\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[31mTypeError\u001b[39m: update_dataset_class_for_local_backend.<locals>.new_init() got an unexpected keyword argument 'datatable_type'"
-     ]
-    }
-   ],
-   "source": [
-    "# Example of using the local backend Dataset operations\n",
-    "import tempfile\n",
-    "import os\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "# Create a temporary directory for demonstration\n",
-    "with tempfile.TemporaryDirectory() as temp_dir:\n",
-    "    # Create a new project with local backend\n",
-    "    local_project = Project.create(\n",
-    "        name=\"test_local_project\",\n",
-    "        description=\"A test project using local backend\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=temp_dir\n",
-    "    )\n",
-    "    \n",
-    "    # Define a test model\n",
-    "    class LocalTestModel(BaseModel):\n",
-    "        id: int\n",
-    "        name: str\n",
-    "        description: str\n",
-    "        score: float\n",
-    "    \n",
-    "    # Create a dataset with local backend\n",
-    "    local_dataset = local_project.create_dataset(\n",
-    "        model=LocalTestModel,\n",
-    "        name=\"test_dataset\",\n",
-    "        backend=\"local\"\n",
-    "    )\n",
-    "    \n",
-    "    # Add some entries to the dataset\n",
-    "    for i in range(5):\n",
-    "        entry = LocalTestModel(\n",
-    "            id=i,\n",
-    "            name=f\"Test Item {i}\",\n",
-    "            description=f\"Description for item {i}\",\n",
-    "            score=i * 0.1\n",
-    "        )\n",
-    "        local_dataset.append(entry)\n",
-    "    \n",
-    "    # Print the dataset contents\n",
-    "    print(f\"Dataset after adding entries: {local_dataset}\")\n",
-    "    \n",
-    "    # Check the CSV file\n",
-    "    dataset_path = local_project.get_dataset_path(\"test_dataset\")\n",
-    "    print(f\"Dataset file path: {dataset_path}\")\n",
-    "    with open(dataset_path, 'r') as f:\n",
-    "        csv_content = f.read()\n",
-    "    print(f\"CSV content:\\n{csv_content}\")\n",
-    "    \n",
-    "    # Modify an entry\n",
-    "    entry = local_dataset[2]\n",
-    "    entry.name = \"Updated Name\"\n",
-    "    entry.score = 9.9\n",
-    "    local_dataset.save(entry)\n",
-    "    \n",
-    "    # Load the dataset again\n",
-    "    local_dataset.load()\n",
-    "    \n",
-    "    # Print updated entry\n",
-    "    print(f\"Updated entry: {local_dataset[2]}\")\n",
-    "    \n",
-    "    # Convert to pandas DataFrame\n",
-    "    df = local_dataset.to_pandas()\n",
-    "    print(\"\\nDataFrame:\")\n",
-    "    print(df)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/experimental/old_nbs/api/project/datasets.md b/experimental/old_nbs/api/project/datasets.md
deleted file mode 100644
index a9b10c1f1..000000000
--- a/experimental/old_nbs/api/project/datasets.md
+++ /dev/null
@@ -1,838 +0,0 @@
----
-jupyter:
-  jupytext:
-    text_representation:
-      extension: .md
-      format_name: markdown
-      format_version: '1.3'
-      jupytext_version: 1.17.1
-  kernelspec:
-    display_name: .venv
-    language: python
-    name: python3
----
-
-# Dataset Management
-
-> Methods to create and manage datasets within projects
-
-```python
-# | default_exp project.datasets
-```
-
-```python
-# | hide
-from nbdev.showdoc import *
-```
-
-```python
-# | export
-import typing as t
-import os
-import asyncio
-
-from fastcore.utils import patch
-from pydantic import BaseModel
-
-from ragas_experimental.project.core import Project
-from ragas_experimental.typing import SUPPORTED_BACKENDS
-from ragas_experimental.backends.factory import RagasApiClientFactory
-from ragas_experimental.backends.ragas_api_client import RagasApiClient
-import ragas_experimental.typing as rt
-from ragas_experimental.utils import async_to_sync, create_nano_id
-from ragas_experimental.dataset import Dataset
-```
-
-# | export
-import typing as t
-import os
-import asyncio
-import tempfile
-import shutil
-import csv
-from pathlib import Path
-
-from fastcore.utils import patch
-from pydantic import BaseModel
-
-from ragas_experimental.project.core import Project
-from ragas_experimental.typing import SUPPORTED_BACKENDS
-from ragas_experimental.backends.factory import RagasApiClientFactory
-from ragas_experimental.backends.ragas_api_client import RagasApiClient
-import ragas_experimental.typing as rt
-from ragas_experimental.utils import async_to_sync, create_nano_id
-from ragas_experimental.dataset import Dataset
-
-# Helper function for tests
-def get_test_directory():
-    """Create a test directory that will be cleaned up on process exit.
-    
-    Returns:
-        str: Path to test directory
-    """
-    # Create a directory in the system temp directory
-    test_dir = os.path.join(tempfile.gettempdir(), f"ragas_test_{create_nano_id()}")
-    os.makedirs(test_dir, exist_ok=True)
-    
-    return test_dir
-
-```python
-#| export
-async def create_dataset_columns(project_id, dataset_id, columns, create_dataset_column_func):
-    tasks = []
-    for column in columns:
-        tasks.append(create_dataset_column_func(
-            project_id=project_id,
-            dataset_id=dataset_id,
-            id=create_nano_id(),
-            name=column["name"],
-            type=column["type"],
-            settings=column["settings"],
-        ))
-    return await asyncio.gather(*tasks)
-```
-
-```python
-# | export
-def get_dataset_from_ragas_app(
-    self: Project, 
-    name: str, 
-    model: t.Type[BaseModel]
-) -> Dataset:
-    """Create a dataset in the Ragas App backend."""
-    # create the dataset
-    sync_version = async_to_sync(self._ragas_api_client.create_dataset)
-    dataset_info = sync_version(
-        project_id=self.project_id,
-        name=name if name is not None else model.__name__,
-    )
-
-    # create the columns for the dataset
-    column_types = rt.ModelConverter.model_to_columns(model)
-    sync_version = async_to_sync(create_dataset_columns)
-    sync_version(
-        project_id=self.project_id,
-        dataset_id=dataset_info["id"],
-        columns=column_types,
-        create_dataset_column_func=self._ragas_api_client.create_dataset_column,
-    )
-        
-    # Return a new Dataset instance
-    return Dataset(
-        name=name if name is not None else model.__name__,
-        model=model,
-        project_id=self.project_id,
-        dataset_id=dataset_info["id"],
-        ragas_api_client=self._ragas_api_client,
-        backend="ragas_app"
-    )
-```
-
-```python
-# | export
-def get_dataset_from_local(
-    self: Project,
-    name: str,
-    model: t.Type[BaseModel]
-) -> Dataset:
-    """Create a dataset in the local filesystem backend.
-    
-    Args:
-        name: Name of the dataset
-        model: Pydantic model defining the structure
-        
-    Returns:
-        Dataset: A new dataset configured to use the local backend
-    """
-    # Use a UUID as the dataset ID
-    dataset_id = create_nano_id()
-    
-    # Return a new Dataset instance with local backend
-    return Dataset(
-        name=name if name is not None else model.__name__,
-        model=model,
-        project_id=self.project_id,
-        dataset_id=dataset_id,
-        backend="local",
-        local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects
-    )
-```
-
-```python
-# | export
-@patch
-def create_dataset(
-    self: Project, 
-    model: t.Type[BaseModel], 
-    name: t.Optional[str] = None,
-    backend: t.Optional[SUPPORTED_BACKENDS] = None
-) -> Dataset:
-    """Create a new dataset.
-
-    Args:
-        model: Model class defining the dataset structure
-        name: Name of the dataset (defaults to model name if not provided)
-        backend: The backend to use (defaults to project's backend if not specified)
-
-    Returns:
-        Dataset: A new dataset object for managing entries
-    """
-    # If name is not provided, use the model name
-    if name is None:
-        name = model.__name__
-        
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    # Create dataset using the appropriate backend
-    if backend == "local":
-        return get_dataset_from_local(self, name, model)
-    elif backend == "ragas_app":
-        return get_dataset_from_ragas_app(self, name, model)
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-```
-
-```python
-# | export
-@patch
-def get_dataset_by_id(
-    self: Project, 
-    dataset_id: str, 
-    model: t.Type[BaseModel],
-    backend: t.Optional[SUPPORTED_BACKENDS] = None
-) -> Dataset:
-    """Get an existing dataset by ID.
-    
-    Args:
-        dataset_id: The ID of the dataset to retrieve
-        model: The model class to use for the dataset entries
-        backend: The backend to use (defaults to project's backend)
-        
-    Returns:
-        Dataset: The retrieved dataset
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-        
-    if backend == "ragas_app":
-        # Search for database with given ID
-        sync_version = async_to_sync(self._ragas_api_client.get_dataset)
-        dataset_info = sync_version(
-            project_id=self.project_id,
-            dataset_id=dataset_id
-        )
-
-        # For now, return Dataset without model type
-        return Dataset(
-            name=dataset_info["name"],
-            model=model,
-            project_id=self.project_id,
-            dataset_id=dataset_id,
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app"
-        )
-    elif backend == "local":
-        # For local backend, this is not a typical operation since we use names
-        # We could maintain a mapping of IDs to names, but for now just raise an error
-        raise NotImplementedError(
-            "get_dataset_by_id is not implemented for local backend. "
-            "Use get_dataset with the dataset name instead."
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-```
-
-```python
-# | export
-@patch
-def get_dataset(
-    self: Project, 
-    dataset_name: str, 
-    model: t.Type[BaseModel],
-    backend: t.Optional[SUPPORTED_BACKENDS] = None
-) -> Dataset:
-    """Get an existing dataset by name.
-    
-    Args:
-        dataset_name: The name of the dataset to retrieve
-        model: The model class to use for the dataset entries
-        backend: The backend to use (defaults to project's backend if not specified)
-        
-    Returns:
-        Dataset: The retrieved dataset
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-        
-    if backend == "ragas_app":
-        # Search for dataset with given name
-        sync_version = async_to_sync(self._ragas_api_client.get_dataset_by_name)
-        dataset_info = sync_version(
-            project_id=self.project_id,
-            dataset_name=dataset_name
-        )
-
-        # Return Dataset instance
-        return Dataset(
-            name=dataset_info["name"],
-            model=model,
-            project_id=self.project_id,
-            dataset_id=dataset_info["id"],
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app"
-        )
-    elif backend == "local":
-        # Check if the dataset file exists
-        dataset_path = self.get_dataset_path(dataset_name)
-        if not os.path.exists(dataset_path):
-            raise ValueError(f"Dataset '{dataset_name}' does not exist")
-            
-        # Create dataset instance with a random ID
-        dataset_id = create_nano_id()
-        
-        # Return Dataset instance
-        return Dataset(
-            name=dataset_name,
-            model=model,
-            project_id=self.project_id,
-            dataset_id=dataset_id,
-            backend="local",
-            local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-```
-
-```python
-# | export
-@patch
-def list_dataset_names(
-    self: Project,
-    backend: t.Optional[SUPPORTED_BACKENDS] = None
-) -> t.List[str]:
-    """List all datasets in the project.
-    
-    Args:
-        backend: The backend to use (defaults to project's backend)
-        
-    Returns:
-        List[str]: Names of all datasets in the project
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-        
-    if backend == "ragas_app":
-        # Get all datasets from API
-        sync_version = async_to_sync(self._ragas_api_client.list_datasets)
-        datasets = sync_version(project_id=self.project_id)
-        return [dataset["name"] for dataset in datasets]
-    elif backend == "local":
-        # Get all CSV files in the datasets directory
-        datasets_dir = os.path.join(self._root_dir, "datasets")
-        if not os.path.exists(datasets_dir):
-            return []
-            
-        return [
-            os.path.splitext(f)[0] for f in os.listdir(datasets_dir)
-            if f.endswith('.csv')
-        ]
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-```
-
-```python
-# Example of using the local backend
-import tempfile
-import os
-from pydantic import BaseModel
-
-# Create a temporary directory for demonstration
-with tempfile.TemporaryDirectory() as temp_dir:
-    # Create a new project with local backend
-    local_project = Project.create(
-        name="test_local_project",
-        description="A test project using local backend",
-        backend="local",
-        root_dir=temp_dir
-    )
-    
-    # Define a test model
-    class LocalTestModel(BaseModel):
-        id: int
-        name: str
-        description: str
-        score: float
-    
-    # Create a dataset with local backend
-    local_dataset = local_project.create_dataset(
-        model=LocalTestModel,
-        name="test_dataset",
-        backend="local"
-    )
-    
-    # Check that the dataset file was created
-    dataset_path = local_project.get_dataset_path("test_dataset")
-    print(f"Dataset file exists: {os.path.exists(dataset_path)}")
-    
-    # List datasets
-    datasets = local_project.list_dataset_names()
-    print(f"Datasets in project: {datasets}")
-    
-    # Get the dataset
-    retrieved_dataset = local_project.get_dataset(
-        dataset_name="test_dataset",
-        model=LocalTestModel,
-        backend="local"
-    )
-    
-    print(f"Retrieved dataset: {retrieved_dataset}")
-```
-
-```python
-# Define a test model for demonstration
-class TestModel(BaseModel):
-    id: int
-    name: str
-    description: str
-    tags: t.Literal["tag1", "tag2", "tag3"]
-    tags_color_coded: t.Annotated[t.Literal["red", "green", "blue"], rt.Select(colors=["red", "green", "blue"])]
-    url: t.Annotated[str, rt.Url()] = "https://www.google.com"
-```
-
-```python
-# Example of using the local backend with Project integration
-import tempfile
-import os
-from pydantic import BaseModel
-
-# Create a temporary directory for demonstration
-with tempfile.TemporaryDirectory() as temp_dir:
-    # Create a new project with local backend
-    local_project = Project.create(
-        name="test_local_project",
-        description="A test project using local backend",
-        backend="local",
-        root_dir=temp_dir
-    )
-    
-    # Define a test model
-    class LocalTestModel(BaseModel):
-        id: int
-        name: str
-        description: str
-        score: float
-    
-    # Create a dataset with local backend
-    local_dataset = local_project.create_dataset(
-        model=LocalTestModel,
-        name="test_dataset"
-    )
-    
-    # Add some entries
-    for i in range(3):
-        entry = LocalTestModel(
-            id=i,
-            name=f"Test Item {i}",
-            description=f"Description for item {i}",
-            score=i * 0.5
-        )
-        local_dataset.append(entry)
-    
-    # Check the dataset
-    print(f"Dataset after adding entries: {local_dataset}")
-    
-    # Get the dataset path
-    dataset_path = local_project.get_dataset_path("test_dataset")
-    print(f"Dataset file path: {dataset_path}")
-    
-    # Check that the file exists
-    print(f"Dataset file exists: {os.path.exists(dataset_path)}")
-    
-    # Read CSV content
-    with open(dataset_path, 'r') as f:
-        csv_content = f.read()
-    print(f"CSV content:\n{csv_content}")
-    
-    # List datasets in the project
-    dataset_names = local_project.list_dataset_names()
-    print(f"Datasets in project: {dataset_names}")
-    
-    # Get the dataset by name
-    retrieved_dataset = local_project.get_dataset(
-        dataset_name="test_dataset",
-        model=LocalTestModel
-    )
-    
-    # Load entries
-    retrieved_dataset.load()
-    print(f"Retrieved dataset: {retrieved_dataset}")
-    
-    # Modify an entry
-    entry = retrieved_dataset[1]
-    entry.name = "Updated Name"
-    entry.score = 9.9
-    retrieved_dataset.save(entry)
-    
-    # Load again to verify changes
-    retrieved_dataset.load()
-    print(f"Updated entry: {retrieved_dataset[1]}")
-    
-    # Convert to DataFrame
-    df = retrieved_dataset.to_pandas()
-    print("\nDataFrame:")
-    print(df)
-```
-
-```python
-# Example of using ragas_app backend (commented out since it requires API access)
-'''
-import os
-from pydantic import BaseModel
-
-# Set environment variables for API access
-RAGAS_APP_TOKEN = "your-api-key"
-RAGAS_API_BASE_URL = "https://api.dev.app.ragas.io"
-os.environ["RAGAS_APP_TOKEN"] = RAGAS_APP_TOKEN
-os.environ["RAGAS_API_BASE_URL"] = RAGAS_API_BASE_URL
-
-# Get a project from the Ragas API
-ragas_app_project = Project.get(
-    name="Your Project Name",
-    backend="ragas_app"
-)
-
-# Define a test model
-class ApiTestModel(BaseModel):
-    id: int
-    name: str
-    description: str
-    score: float
-
-# Create a dataset with ragas_app backend
-api_dataset = ragas_app_project.create_dataset(
-    model=ApiTestModel,
-    name="api_test_dataset",
-    backend="ragas_app"
-)
-
-# Add some entries
-for i in range(3):
-    entry = ApiTestModel(
-        id=i,
-        name=f"API Test Item {i}",
-        description=f"Description for API item {i}",
-        score=i * 1.1
-    )
-    api_dataset.append(entry)
-
-# List all datasets in the project
-dataset_names = ragas_app_project.list_dataset_names(backend="ragas_app")
-print(f"Datasets in project: {dataset_names}")
-
-# Get the dataset by name
-retrieved_dataset = ragas_app_project.get_dataset(
-    dataset_name="api_test_dataset",
-    model=ApiTestModel,
-    backend="ragas_app"
-)
-
-# Load entries
-retrieved_dataset.load()
-print(f"Retrieved dataset: {retrieved_dataset}")
-
-# View as DataFrame
-df = retrieved_dataset.to_pandas()
-print("\nDataFrame:")
-print(df)
-'''
-```
-
-```python
-# | export
-def update_dataset_class_for_local_backend():
-    """Updates the Dataset class to support local backend.
-    
-    This is called when the module is imported to patch the Dataset class
-    with methods that enable local backend support.
-    """
-    from ragas_experimental.dataset import Dataset
-    import csv
-    import os
-    import uuid
-    
-    # Add backend parameter to Dataset.__init__
-    original_init = Dataset.__init__
-    
-    def new_init(
-        self,
-        name: str,
-        model: t.Type[BaseModel],
-        project_id: str,
-        dataset_id: str,
-        ragas_api_client=None,
-        backend: t.Literal["ragas_app", "local"] = "ragas_app",
-        local_root_dir: t.Optional[str] = None,
-    ):
-        self.backend = backend
-        self.local_root_dir = local_root_dir
-        
-        if backend == "local":
-            if local_root_dir is None:
-                raise ValueError("local_root_dir is required for local backend")
-                
-            # Set basic properties
-            self.name = name
-            self.model = model
-            self.project_id = project_id
-            self.dataset_id = dataset_id
-            self._ragas_api_client = None
-            self._entries = []
-            
-            # Setup column mapping
-            if not hasattr(self.model, "__column_mapping__"):
-                self.model.__column_mapping__ = {}
-                
-            # For local backend, columns map directly to field names
-            for field_name in model.__annotations__:
-                self.model.__column_mapping__[field_name] = field_name
-                
-            # Load entries from CSV if it exists
-            self._load_from_csv()
-        else:
-            # Call original init for ragas_app backend
-            original_init(self, name, model, project_id, dataset_id, ragas_api_client)
-    
-    # Add method to load from CSV
-    def _load_from_csv(self):
-        """Load dataset entries from CSV file."""
-        if self.backend != "local":
-            return
-            
-        # Construct CSV path
-        project_dir = os.path.join(self.local_root_dir, self.project_id)
-        csv_path = os.path.join(project_dir, "datasets", f"{self.name}.csv")
-        
-        if not os.path.exists(csv_path):
-            return
-            
-        # Read CSV
-        with open(csv_path, 'r', newline='') as f:
-            reader = csv.DictReader(f)
-            
-            # Clear existing entries
-            self._entries.clear()
-            
-            # Process rows
-            for row in reader:
-                try:
-                    # Convert types as needed based on model annotations
-                    typed_row = {}
-                    for field, value in row.items():
-                        if field in self.model.__annotations__:
-                            field_type = self.model.__annotations__[field]
-                            
-                            # Handle basic type conversions
-                            if field_type == int:
-                                typed_row[field] = int(value) if value else 0
-                            elif field_type == float:
-                                typed_row[field] = float(value) if value else 0.0
-                            elif field_type == bool:
-                                typed_row[field] = value.lower() in ('true', 't', 'yes', 'y', '1')
-                            else:
-                                typed_row[field] = value
-                    
-                    # Create model instance
-                    entry = self.model(**typed_row)
-                    
-                    # Add row_id for tracking changes
-                    entry._row_id = str(uuid.uuid4())
-                    
-                    self._entries.append(entry)
-                except Exception as e:
-                    print(f"Error loading row: {e}")
-    
-    # Add method to save to CSV
-    def _save_to_csv(self):
-        """Save all entries to CSV file."""
-        if self.backend != "local":
-            return
-            
-        # Construct CSV path
-        project_dir = os.path.join(self.local_root_dir, self.project_id)
-        csv_path = os.path.join(project_dir, "datasets", f"{self.name}.csv")
-        
-        # Ensure directory exists
-        os.makedirs(os.path.dirname(csv_path), exist_ok=True)
-        
-        # Get field names from model
-        field_names = list(self.model.__annotations__.keys())
-        
-        # Write to CSV
-        with open(csv_path, 'w', newline='') as f:
-            writer = csv.DictWriter(f, fieldnames=field_names)
-            writer.writeheader()
-            
-            for entry in self._entries:
-                # Convert model instance to dict and write row
-                writer.writerow(entry.model_dump())
-    
-    # Patch the original methods to support local backend
-    
-    # Patch append
-    original_append = Dataset.append
-    
-    def new_append(self, entry):
-        if self.backend == "local":
-            if not isinstance(entry, self.model):
-                raise TypeError(f"Entry must be an instance of {self.model.__name__}")
-                
-            # Add row_id for tracking changes
-            entry._row_id = str(uuid.uuid4())
-            
-            # Add to in-memory entries
-            self._entries.append(entry)
-            
-            # Save to CSV
-            self._save_to_csv()
-        else:
-            original_append(self, entry)
-    
-    # Patch pop
-    original_pop = Dataset.pop
-    
-    def new_pop(self, index=-1):
-        if self.backend == "local":
-            # Remove from in-memory entries
-            entry = self._entries.pop(index)
-            
-            # Save to CSV
-            self._save_to_csv()
-            
-            return entry
-        else:
-            return original_pop(self, index)
-    
-    # Patch load
-    original_load = Dataset.load
-    
-    def new_load(self):
-        if self.backend == "local":
-            self._load_from_csv()
-        else:
-            original_load(self)
-    
-    # Patch save
-    original_save = Dataset.save
-    
-    def new_save(self, item):
-        if self.backend == "local":
-            if not isinstance(item, self.model):
-                raise TypeError(f"Item must be an instance of {self.model.__name__}")
-                
-            # Find the item in our entries
-            found = False
-            for i, entry in enumerate(self._entries):
-                if hasattr(entry, "_row_id") and hasattr(item, "_row_id") and entry._row_id == item._row_id:
-                    # Update the entry
-                    self._entries[i] = item
-                    found = True
-                    break
-                    
-            if not found:
-                # If we didn't find it, add it
-                if not hasattr(item, "_row_id"):
-                    item._row_id = str(uuid.uuid4())
-                self._entries.append(item)
-                
-            # Save to CSV
-            self._save_to_csv()
-        else:
-            original_save(self, item)
-    
-    # Apply all patches
-    Dataset.__init__ = new_init
-    Dataset._load_from_csv = _load_from_csv
-    Dataset._save_to_csv = _save_to_csv
-    Dataset.append = new_append
-    Dataset.pop = new_pop
-    Dataset.load = new_load
-    Dataset.save = new_save
-    
-    return Dataset
-
-# Update the Dataset class
-updated_dataset_class = update_dataset_class_for_local_backend()
-```
-
-```python
-# Example of using the local backend Dataset operations
-import tempfile
-import os
-from pydantic import BaseModel
-
-# Create a temporary directory for demonstration
-with tempfile.TemporaryDirectory() as temp_dir:
-    # Create a new project with local backend
-    local_project = Project.create(
-        name="test_local_project",
-        description="A test project using local backend",
-        backend="local",
-        root_dir=temp_dir
-    )
-    
-    # Define a test model
-    class LocalTestModel(BaseModel):
-        id: int
-        name: str
-        description: str
-        score: float
-    
-    # Create a dataset with local backend
-    local_dataset = local_project.create_dataset(
-        model=LocalTestModel,
-        name="test_dataset",
-        backend="local"
-    )
-    
-    # Add some entries to the dataset
-    for i in range(5):
-        entry = LocalTestModel(
-            id=i,
-            name=f"Test Item {i}",
-            description=f"Description for item {i}",
-            score=i * 0.1
-        )
-        local_dataset.append(entry)
-    
-    # Print the dataset contents
-    print(f"Dataset after adding entries: {local_dataset}")
-    
-    # Check the CSV file
-    dataset_path = local_project.get_dataset_path("test_dataset")
-    print(f"Dataset file path: {dataset_path}")
-    with open(dataset_path, 'r') as f:
-        csv_content = f.read()
-    print(f"CSV content:\n{csv_content}")
-    
-    # Modify an entry
-    entry = local_dataset[2]
-    entry.name = "Updated Name"
-    entry.score = 9.9
-    local_dataset.save(entry)
-    
-    # Load the dataset again
-    local_dataset.load()
-    
-    # Print updated entry
-    print(f"Updated entry: {local_dataset[2]}")
-    
-    # Convert to pandas DataFrame
-    df = local_dataset.to_pandas()
-    print("\nDataFrame:")
-    print(df)
-```
diff --git a/experimental/old_nbs/api/project/experiments.ipynb b/experimental/old_nbs/api/project/experiments.ipynb
deleted file mode 100644
index 268485b14..000000000
--- a/experimental/old_nbs/api/project/experiments.ipynb
+++ /dev/null
@@ -1,2281 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Experiments\n",
-    "\n",
-    "> How to run experiments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp project.experiments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# | export\n",
-    "from functools import wraps\n",
-    "import asyncio\n",
-    "import typing as t\n",
-    "import os\n",
-    "\n",
-    "from fastcore.utils import patch\n",
-    "from tqdm import tqdm\n",
-    "\n",
-    "from ragas_experimental.project.core import Project\n",
-    "from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel\n",
-    "from ragas_experimental.utils import async_to_sync, create_nano_id\n",
-    "from ragas_experimental.dataset import Dataset, BaseModelType\n",
-    "from ragas_experimental.experiment import Experiment\n",
-    "import ragas_experimental.typing as rt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Basics"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "# Add this helper function similar to create_dataset_columns in core.ipynb\n",
-    "async def create_experiment_columns(project_id, experiment_id, columns, create_experiment_column_func):\n",
-    "    tasks = []\n",
-    "    for column in columns:\n",
-    "        tasks.append(create_experiment_column_func(\n",
-    "            project_id=project_id,\n",
-    "            experiment_id=experiment_id,\n",
-    "            id=create_nano_id(),\n",
-    "            name=column[\"name\"],\n",
-    "            type=column[\"type\"],\n",
-    "            settings=column[\"settings\"]\n",
-    "        ))\n",
-    "    return await asyncio.gather(*tasks)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def get_experiment_from_local(\n",
-    "    self: Project,\n",
-    "    name: str,\n",
-    "    model: t.Type[BaseModel]\n",
-    ") -> Experiment:\n",
-    "    \"\"\"Create an experiment in the local filesystem backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        name: Name of the experiment\n",
-    "        model: Model class defining the experiment structure\n",
-    "        \n",
-    "    Returns:\n",
-    "        Experiment: A new experiment configured to use the local backend\n",
-    "    \"\"\"\n",
-    "    # Use a UUID as the experiment ID\n",
-    "    experiment_id = create_nano_id()\n",
-    "\n",
-    "    # Return a new Experiment instance with local backend\n",
-    "    return Experiment(\n",
-    "        name=name,\n",
-    "        model=model,\n",
-    "        project_id=self.project_id,\n",
-    "        experiment_id=experiment_id,\n",
-    "        backend=\"local\",\n",
-    "        local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def get_experiment_from_ragas_app(\n",
-    "    self: Project, \n",
-    "    name: str, \n",
-    "    model: t.Type[BaseModel]\n",
-    ") -> Experiment:\n",
-    "    \"\"\"Create an experiment in the Ragas App backend.\n",
-    "    \n",
-    "    Args:\n",
-    "        name: Name of the experiment\n",
-    "        model: Model class defining the experiment structure\n",
-    "        \n",
-    "    Returns:\n",
-    "        Experiment: A new experiment configured to use the ragas_app backend\n",
-    "    \"\"\"\n",
-    "    # Create the experiment in the API\n",
-    "    sync_version = async_to_sync(self._ragas_api_client.create_experiment)\n",
-    "    experiment_info = sync_version(\n",
-    "        project_id=self.project_id,\n",
-    "        name=name,\n",
-    "    )\n",
-    "\n",
-    "    # Create the columns for the experiment\n",
-    "    column_types = rt.ModelConverter.model_to_columns(model)\n",
-    "    sync_version = async_to_sync(create_experiment_columns)\n",
-    "    sync_version(\n",
-    "        project_id=self.project_id,\n",
-    "        experiment_id=experiment_info[\"id\"],\n",
-    "        columns=column_types,\n",
-    "        create_experiment_column_func=self._ragas_api_client.create_experiment_column,\n",
-    "    )\n",
-    "    \n",
-    "    # Return a new Experiment instance with ragas_app backend\n",
-    "    return Experiment(\n",
-    "        name=name,\n",
-    "        model=model,\n",
-    "        project_id=self.project_id,\n",
-    "        experiment_id=experiment_info[\"id\"],\n",
-    "        ragas_api_client=self._ragas_api_client,\n",
-    "        backend=\"ragas_app\"\n",
-    "    )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental.utils import get_test_directory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def create_experiment(\n",
-    "    self: Project, \n",
-    "    name: str, \n",
-    "    model: t.Type[BaseModel],\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    ") -> Experiment:\n",
-    "    \"\"\"Create a new experiment.\n",
-    "\n",
-    "    Args:\n",
-    "        name: Name of the experiment\n",
-    "        model: Model class defining the experiment structure\n",
-    "        backend: The backend to use (defaults to project's backend if not specified)\n",
-    "\n",
-    "    Returns:\n",
-    "        Experiment: An experiment object for managing results\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "\n",
-    "    # Create experiment using the appropriate backend\n",
-    "    if backend == \"local\":\n",
-    "        return get_experiment_from_local(self, name, model)\n",
-    "    elif backend == \"ragas_app\":\n",
-    "        return get_experiment_from_ragas_app(self, name, model)\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get_experiment_path(self: Project, experiment_name: str) -> str:\n",
-    "    \"\"\"Get the filesystem path for an experiment.\n",
-    "    \n",
-    "    Args:\n",
-    "        experiment_name: The name of the experiment\n",
-    "        \n",
-    "    Returns:\n",
-    "        str: The absolute path to the experiment CSV file\n",
-    "    \"\"\"\n",
-    "    # Create path relative to project root\n",
-    "    return os.path.join(self._root_dir, \"experiments\", f\"{experiment_name}.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TestModel(BaseModel):\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    price: float\n",
-    "    url: t.Annotated[str, rt.Url()] = \"https://www.google.com\"\n",
-    "    tags: t.Annotated[t.Literal[\"test\", \"test2\"], rt.Select(colors=[\"red\", \"blue\"])] = \"test\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "local_root_dir = get_test_directory()\n",
-    "p = Project(project_id=\"test\", root_dir=local_root_dir)\n",
-    "exp = p.create_experiment(name=\"test experiment\", model=TestModel)\n",
-    "\n",
-    "assert os.path.exists(p.get_experiment_path(exp.name))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get_experiment_by_id(\n",
-    "    self: Project, \n",
-    "    experiment_id: str, \n",
-    "    model: t.Type[BaseModel],\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    ") -> Experiment:\n",
-    "    \"\"\"Get an existing experiment by ID.\n",
-    "    \n",
-    "    Args:\n",
-    "        experiment_id: The ID of the experiment to retrieve\n",
-    "        model: The model class to use for the experiment results\n",
-    "        backend: The backend to use (defaults to project's backend)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Experiment: The retrieved experiment\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Get experiment info from API\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.get_experiment)\n",
-    "        experiment_info = sync_version(\n",
-    "            project_id=self.project_id,\n",
-    "            experiment_id=experiment_id\n",
-    "        )\n",
-    "\n",
-    "        # Return Experiment instance with ragas_app backend\n",
-    "        return Experiment(\n",
-    "            name=experiment_info[\"name\"],\n",
-    "            model=model,\n",
-    "            project_id=self.project_id,\n",
-    "            experiment_id=experiment_id,\n",
-    "            ragas_api_client=self._ragas_api_client,\n",
-    "            backend=\"ragas_app\"\n",
-    "        )\n",
-    "    elif backend == \"local\":\n",
-    "        # For local backend, this is not a typical operation since we use names\n",
-    "        # We could maintain a mapping of IDs to names, but for now just raise an error\n",
-    "        raise NotImplementedError(\n",
-    "            \"get_experiment_by_id is not implemented for local backend. \"\n",
-    "            \"Use get_experiment with the experiment name instead.\"\n",
-    "        )\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def list_experiment_names(\n",
-    "    self: Project,\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    ") -> t.List[str]:\n",
-    "    \"\"\"List all experiments in the project.\n",
-    "    \n",
-    "    Args:\n",
-    "        backend: The backend to use (defaults to project's backend)\n",
-    "        \n",
-    "    Returns:\n",
-    "        List[str]: Names of all experiments in the project\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Get all experiments from API\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.list_experiments)\n",
-    "        experiments = sync_version(project_id=self.project_id)\n",
-    "        return [experiment[\"name\"] for experiment in experiments]\n",
-    "    elif backend == \"local\":\n",
-    "        # Get all CSV files in the experiments directory\n",
-    "        experiments_dir = os.path.join(self._root_dir, \"experiments\")\n",
-    "        if not os.path.exists(experiments_dir):\n",
-    "            return []\n",
-    "            \n",
-    "        return [\n",
-    "            os.path.splitext(f)[0] for f in os.listdir(experiments_dir)\n",
-    "            if f.endswith('.csv')\n",
-    "        ]\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from fastcore.test import *"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_eq(p.list_experiment_names(), [\"test experiment\"])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def get_experiment(\n",
-    "    self: Project, \n",
-    "    experiment_name: str, \n",
-    "    model: t.Type[BaseModel],\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    ") -> Experiment:\n",
-    "    \"\"\"Get an existing experiment by name.\n",
-    "    \n",
-    "    Args:\n",
-    "        experiment_name: The name of the experiment to retrieve\n",
-    "        model: The model class to use for the experiment results\n",
-    "        backend: The backend to use (defaults to project's backend if not specified)\n",
-    "        \n",
-    "    Returns:\n",
-    "        Experiment: The retrieved experiment\n",
-    "    \"\"\"\n",
-    "    # If backend is not specified, use the project's backend\n",
-    "    if backend is None:\n",
-    "        backend = self.backend\n",
-    "        \n",
-    "    if backend == \"ragas_app\":\n",
-    "        # Search for experiment with given name\n",
-    "        sync_version = async_to_sync(self._ragas_api_client.get_experiment_by_name)\n",
-    "        experiment_info = sync_version(\n",
-    "            project_id=self.project_id,\n",
-    "            experiment_name=experiment_name\n",
-    "        )\n",
-    "\n",
-    "        # Return Experiment instance with ragas_app backend\n",
-    "        return Experiment(\n",
-    "            name=experiment_info[\"name\"],\n",
-    "            model=model,\n",
-    "            project_id=self.project_id,\n",
-    "            experiment_id=experiment_info[\"id\"],\n",
-    "            ragas_api_client=self._ragas_api_client,\n",
-    "            backend=\"ragas_app\"\n",
-    "        )\n",
-    "    elif backend == \"local\":\n",
-    "        # Check if the experiment file exists\n",
-    "        experiment_path = self.get_experiment_path(experiment_name)\n",
-    "        if not os.path.exists(experiment_path):\n",
-    "            raise ValueError(f\"Experiment '{experiment_name}' does not exist\")\n",
-    "            \n",
-    "        # Create experiment instance with a random ID\n",
-    "        experiment_id = create_nano_id()\n",
-    "        \n",
-    "        # Return Experiment instance with local backend\n",
-    "        return Experiment(\n",
-    "            name=experiment_name,\n",
-    "            model=model,\n",
-    "            project_id=self.project_id,\n",
-    "            experiment_id=experiment_id,\n",
-    "            backend=\"local\",\n",
-    "            local_root_dir=os.path.dirname(self._root_dir)  # Root dir for all projects\n",
-    "        )\n",
-    "    else:\n",
-    "        raise ValueError(f\"Unsupported backend: {backend}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_eq(p.get_experiment(\"test experiment\", TestModel), exp)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Git Versioning for Experiments"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "import git\n",
-    "from pathlib import Path"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def find_git_root(\n",
-    "        start_path: t.Union[str, Path, None] = None # starting path to search from\n",
-    "    ) -> Path:\n",
-    "    \"\"\"Find the root directory of a git repository by traversing up from the start path.\"\"\"\n",
-    "    # Start from the current directory if no path is provided\n",
-    "    if start_path is None:\n",
-    "        start_path = Path.cwd()\n",
-    "    else:\n",
-    "        start_path = Path(start_path).resolve()\n",
-    "    \n",
-    "    # Check if the current directory is a git repository\n",
-    "    current_path = start_path\n",
-    "    while current_path != current_path.parent:  # Stop at filesystem root\n",
-    "        if (current_path / '.git').exists() and (current_path / '.git').is_dir():\n",
-    "            return current_path\n",
-    "        \n",
-    "        # Move up to the parent directory\n",
-    "        current_path = current_path.parent\n",
-    "    \n",
-    "    # Final check for the root directory\n",
-    "    if (current_path / '.git').exists() and (current_path / '.git').is_dir():\n",
-    "        return current_path\n",
-    "    \n",
-    "    # No git repository found\n",
-    "    raise ValueError(f\"No git repository found in or above {start_path}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Path('/Users/jjmachan/workspace/eglabs/ragas')"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "find_git_root()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<git.repo.base.Repo '/Users/jjmachan/workspace/eglabs/ragas/.git'>"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "git.Repo(find_git_root())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "\n",
-    "def version_experiment(\n",
-    "    experiment_name: str,\n",
-    "    commit_message: t.Optional[str] = None,\n",
-    "    repo_path: t.Union[str, Path, None] = None,\n",
-    "    create_branch: bool = True,\n",
-    "    stage_all: bool = False,\n",
-    ") -> str:\n",
-    "    \"Version control the current state of the codebase for an experiment.\"\n",
-    "    # Default to current directory if no repo path is provided\n",
-    "    if repo_path is None:\n",
-    "        repo_path = find_git_root()\n",
-    "    \n",
-    "    # Initialize git repo object\n",
-    "    repo = git.Repo(repo_path)\n",
-    "\n",
-    "    # check if there are any changes to the repo\n",
-    "    has_changes = False\n",
-    "    if stage_all and repo.is_dirty(untracked_files=True):\n",
-    "        print(\"Staging all changes\")\n",
-    "        repo.git.add('.')\n",
-    "        has_changes = True\n",
-    "    elif repo.is_dirty(untracked_files=False):\n",
-    "        print(\"Staging changes to tracked files\")\n",
-    "        repo.git.add('-u')\n",
-    "        has_changes = True\n",
-    "    \n",
-    "    # Check if there are uncommitted changes\n",
-    "    if has_changes:\n",
-    "        # Default commit message if none provided\n",
-    "        if commit_message is None:\n",
-    "            commit_message = f\"Experiment: {experiment_name}\"\n",
-    "        \n",
-    "        # Commit changes\n",
-    "        commit = repo.index.commit(commit_message)\n",
-    "        commit_hash = commit.hexsha\n",
-    "        print(f\"Changes committed with hash: {commit_hash[:8]}\")\n",
-    "    else:\n",
-    "        # No changes to commit, use current HEAD\n",
-    "        commit_hash = repo.head.commit.hexsha\n",
-    "        print(\"No changes detected, nothing to commit\")\n",
-    "    \n",
-    "    # Format the branch/tag name\n",
-    "    version_name = f\"ragas/{experiment_name}\"\n",
-    "    \n",
-    "    # Create branch if requested\n",
-    "    if create_branch:\n",
-    "        branch = repo.create_head(version_name, commit_hash)\n",
-    "        print(f\"Created branch: {version_name}\")\n",
-    "    \n",
-    "    return commit_hash"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def cleanup_experiment_branches(\n",
-    "    prefix: str = \"ragas/\", \n",
-    "    repo_path: t.Union[str, Path, None] = None,\n",
-    "    interactive: bool = True,\n",
-    "    dry_run: bool = False\n",
-    ") -> t.List[str]:\n",
-    "    \"\"\"Clean up git branches with the specified prefix.\"\"\"\n",
-    "    # Find the git repository root if not provided\n",
-    "    if repo_path is None:\n",
-    "        try:\n",
-    "            repo_path = find_git_root()\n",
-    "        except ValueError as e:\n",
-    "            raise ValueError(f\"Cannot cleanup branches: {str(e)}\")\n",
-    "    \n",
-    "    # Initialize git repo object\n",
-    "    repo = git.Repo(repo_path)\n",
-    "    current_branch = repo.active_branch.name\n",
-    "    \n",
-    "    # Get all branches matching the prefix\n",
-    "    matching_branches = []\n",
-    "    for branch in repo.branches:\n",
-    "        if branch.name.startswith(prefix):\n",
-    "            matching_branches.append(branch.name)\n",
-    "    \n",
-    "    if not matching_branches:\n",
-    "        print(f\"No branches found with prefix '{prefix}'\")\n",
-    "        return []\n",
-    "    \n",
-    "    # Remove current branch from the list if present\n",
-    "    if current_branch in matching_branches:\n",
-    "        print(f\"Note: Current branch '{current_branch}' will be excluded from deletion\")\n",
-    "        matching_branches.remove(current_branch)\n",
-    "        \n",
-    "    if not matching_branches:\n",
-    "        print(\"No branches available for deletion after excluding current branch\")\n",
-    "        return []\n",
-    "    \n",
-    "    # Show branches to the user\n",
-    "    print(f\"Found {len(matching_branches)} branches with prefix '{prefix}':\")\n",
-    "    for branch_name in matching_branches:\n",
-    "        print(f\"- {branch_name}\")\n",
-    "    \n",
-    "    # Handle confirmation in interactive mode\n",
-    "    proceed = True\n",
-    "    if interactive and not dry_run:\n",
-    "        confirm = input(f\"\\nDelete these {len(matching_branches)} branches? (y/n): \").strip().lower()\n",
-    "        proceed = (confirm == 'y')\n",
-    "    \n",
-    "    if not proceed:\n",
-    "        print(\"Operation cancelled\")\n",
-    "        return []\n",
-    "    \n",
-    "    # Perform deletion\n",
-    "    deleted_branches = []\n",
-    "    for branch_name in matching_branches:\n",
-    "        if dry_run:\n",
-    "            print(f\"Would delete branch: {branch_name}\")\n",
-    "            deleted_branches.append(branch_name)\n",
-    "        else:\n",
-    "            try:\n",
-    "                # Delete the branch\n",
-    "                repo.git.branch('-D', branch_name)\n",
-    "                print(f\"Deleted branch: {branch_name}\")\n",
-    "                deleted_branches.append(branch_name)\n",
-    "            except git.GitCommandError as e:\n",
-    "                print(f\"Error deleting branch '{branch_name}': {str(e)}\")\n",
-    "    \n",
-    "    if dry_run:\n",
-    "        print(f\"\\nDry run complete. {len(deleted_branches)} branches would be deleted.\")\n",
-    "    else:\n",
-    "        print(f\"\\nCleanup complete. {len(deleted_branches)} branches deleted.\")\n",
-    "    \n",
-    "    return deleted_branches"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No branches found with prefix 'ragas/'\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "[]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "cleanup_experiment_branches(dry_run=True)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Experiment Wrapper"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export \n",
-    "@t.runtime_checkable\n",
-    "class ExperimentProtocol(t.Protocol):\n",
-    "    async def __call__(self, *args, **kwargs): ...\n",
-    "    async def run_async(self, name: str, dataset: Dataset): ..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "from ragas_experimental.project.naming import MemorableNames"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "memorable_names = MemorableNames()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def experiment(\n",
-    "    self: Project, \n",
-    "    experiment_model, \n",
-    "    name_prefix: str = \"\", \n",
-    "    save_to_git: bool = False, \n",
-    "    stage_all: bool = False,\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    "):\n",
-    "    \"\"\"Decorator for creating experiment functions.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_model: The model type to use for experiment results\n",
-    "        name_prefix: Optional prefix for experiment names\n",
-    "        save_to_git: Whether to save experiment state to git\n",
-    "        stage_all: Whether to stage all files when saving to git\n",
-    "        backend: Backend to use for this experiment (overrides project's backend)\n",
-    "\n",
-    "    Returns:\n",
-    "        Decorator function that wraps experiment functions\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        @wraps(func)\n",
-    "        async def wrapped_experiment(*args, **kwargs):\n",
-    "            # Simply call the function\n",
-    "            return await func(*args, **kwargs)\n",
-    "\n",
-    "        # Add run method to the wrapped function\n",
-    "        async def run_async(\n",
-    "            dataset: Dataset, \n",
-    "            name: t.Optional[str] = None, \n",
-    "            save_to_git: bool = save_to_git, \n",
-    "            stage_all: bool = stage_all,\n",
-    "            backend: t.Optional[rt.SUPPORTED_BACKENDS] = backend\n",
-    "        ):\n",
-    "            # If name is not provided, generate a memorable name\n",
-    "            if name is None:\n",
-    "                name = memorable_names.generate_unique_name()\n",
-    "            if name_prefix:\n",
-    "                name = f\"{name_prefix}-{name}\"\n",
-    "\n",
-    "            # Determine which backend to use (parameter > decorator > project default)\n",
-    "            effective_backend = backend if backend is not None else self.backend\n",
-    "\n",
-    "            experiment_view = None\n",
-    "            try:\n",
-    "                # Create the experiment view using the specified backend\n",
-    "                experiment_view = self.create_experiment(\n",
-    "                    name=name, \n",
-    "                    model=experiment_model,\n",
-    "                    backend=effective_backend\n",
-    "                )\n",
-    "                \n",
-    "                # Create tasks for all items\n",
-    "                tasks = []\n",
-    "                for item in dataset:\n",
-    "                    tasks.append(wrapped_experiment(item))\n",
-    "\n",
-    "                # Calculate total operations (processing + appending)\n",
-    "                total_operations = len(tasks) * 2  # Each item requires processing and appending\n",
-    "                \n",
-    "                # Use tqdm for combined progress tracking\n",
-    "                results = []\n",
-    "                progress_bar = tqdm(total=total_operations, desc=\"Running experiment\")\n",
-    "                \n",
-    "                # Process all items\n",
-    "                for future in asyncio.as_completed(tasks):\n",
-    "                    result = await future\n",
-    "                    if result is not None:\n",
-    "                        results.append(result)\n",
-    "                    progress_bar.update(1)  # Update for task completion\n",
-    "                \n",
-    "                # Append results to experiment view\n",
-    "                for result in results:\n",
-    "                    experiment_view.append(result)\n",
-    "                    progress_bar.update(1)  # Update for append operation\n",
-    "                    \n",
-    "                progress_bar.close()\n",
-    "                \n",
-    "            except Exception as e:\n",
-    "                # Clean up the experiment if there was an error and it was created\n",
-    "                if experiment_view is not None:\n",
-    "                    try:\n",
-    "                        if effective_backend == \"ragas_app\" and hasattr(self, \"_ragas_api_client\"):\n",
-    "                            # Delete the experiment in Ragas App\n",
-    "                            sync_version = async_to_sync(self._ragas_api_client.delete_experiment)\n",
-    "                            sync_version(project_id=self.project_id, experiment_id=experiment_view.experiment_id)\n",
-    "                        elif effective_backend == \"local\":\n",
-    "                            # Delete the local file\n",
-    "                            experiment_path = self.get_experiment_path(experiment_view.name)\n",
-    "                            if os.path.exists(experiment_path):\n",
-    "                                os.remove(experiment_path)\n",
-    "                        # Could add more backend-specific cleanup here\n",
-    "                    except Exception as cleanup_error:\n",
-    "                        print(f\"Failed to clean up experiment after error: {cleanup_error}\")\n",
-    "                \n",
-    "                # Re-raise the original exception\n",
-    "                raise e\n",
-    "\n",
-    "            # save to git if requested\n",
-    "            if save_to_git:\n",
-    "                repo_path = find_git_root()\n",
-    "                version_experiment(experiment_name=name, repo_path=repo_path, stage_all=stage_all)\n",
-    "\n",
-    "            return experiment_view\n",
-    "\n",
-    "        wrapped_experiment.__setattr__(\"run_async\", run_async)\n",
-    "        return t.cast(ExperimentProtocol, wrapped_experiment)\n",
-    "\n",
-    "    return decorator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def langfuse_experiment(\n",
-    "    self: Project, experiment_model, name_prefix: str = \"\", \n",
-    "    save_to_git: bool = True, stage_all: bool = True\n",
-    "):\n",
-    "    \"\"\"Decorator for creating experiment functions with Langfuse integration.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_model: The NotionModel type to use for experiment results\n",
-    "        name_prefix: Optional prefix for experiment names\n",
-    "        save_to_git: Whether to save the experiment state to git\n",
-    "        stage_all: Whether to stage all files when saving to git\n",
-    "\n",
-    "    Returns:\n",
-    "        Decorator function that wraps experiment functions with Langfuse observation\n",
-    "    \"\"\"\n",
-    "    # Use the project's backend as the source of truth\n",
-    "    backend = self.backend\n",
-    "\n",
-    "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        @wraps(func)\n",
-    "        async def langfuse_wrapped_func(*args, **kwargs):\n",
-    "            # Apply langfuse observation directly here\n",
-    "            trace_name = f\"{name_prefix}-{func.__name__}\" if name_prefix else func.__name__\n",
-    "            observed_func = observe(name=trace_name)(func)\n",
-    "            return await observed_func(*args, **kwargs)\n",
-    "        \n",
-    "        # Now create the experiment wrapper with our already-observed function\n",
-    "        experiment_wrapper = self.experiment(experiment_model, name_prefix, save_to_git, stage_all)(langfuse_wrapped_func)\n",
-    "        \n",
-    "        return t.cast(ExperimentProtocol, experiment_wrapper)\n",
-    "\n",
-    "    return decorator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "\n",
-    "# this one we have to clean up\n",
-    "from langfuse.decorators import observe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def langfuse_experiment(\n",
-    "    self: Project, \n",
-    "    experiment_model, \n",
-    "    name_prefix: str = \"\", \n",
-    "    save_to_git: bool = True, \n",
-    "    stage_all: bool = True,\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    "):\n",
-    "    \"\"\"Decorator for creating experiment functions with Langfuse integration.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_model: The model type to use for experiment results\n",
-    "        name_prefix: Optional prefix for experiment names\n",
-    "        save_to_git: Whether to save experiment state to git\n",
-    "        stage_all: Whether to stage all files when saving to git\n",
-    "        backend: Backend to use for this experiment (overrides project's backend)\n",
-    "\n",
-    "    Returns:\n",
-    "        Decorator function that wraps experiment functions with Langfuse observation\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        @wraps(func)\n",
-    "        async def langfuse_wrapped_func(*args, **kwargs):\n",
-    "            # Apply langfuse observation directly here\n",
-    "            trace_name = f\"{name_prefix}-{func.__name__}\" if name_prefix else func.__name__\n",
-    "            observed_func = observe(name=trace_name)(func)\n",
-    "            return await observed_func(*args, **kwargs)\n",
-    "        \n",
-    "        # Now create the experiment wrapper with our already-observed function\n",
-    "        experiment_wrapper = self.experiment(\n",
-    "            experiment_model, \n",
-    "            name_prefix, \n",
-    "            save_to_git, \n",
-    "            stage_all,\n",
-    "            backend=backend\n",
-    "        )(langfuse_wrapped_func)\n",
-    "        \n",
-    "        return t.cast(ExperimentProtocol, experiment_wrapper)\n",
-    "\n",
-    "    return decorator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "# import langfuse\n",
-    "from langfuse import Langfuse"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "langfuse = Langfuse(\n",
-    "  secret_key=os.getenv(\"LANGFUSE_SECRET_KEY\"),\n",
-    "  public_key=os.getenv(\"LANGFUSE_PUBLIC_KEY\"),\n",
-    "  host=\"https://us.cloud.langfuse.com\"\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@p.langfuse_experiment(TextExperimentModel)\n",
-    "async def test_experiment(item: TestModel):\n",
-    "    return TextExperimentModel(**item.model_dump(), response=\"test response\", is_correct=\"yes\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "@patch\n",
-    "def mlflow_experiment(\n",
-    "    self: Project, experiment_model, name_prefix: str = \"\",\n",
-    "    save_to_git: bool = True, stage_all: bool = True\n",
-    "):\n",
-    "    \"\"\"Decorator for creating experiment functions with mlflow integration.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_model: The NotionModel type to use for experiment results\n",
-    "        name_prefix: Optional prefix for experiment names\n",
-    "        save_to_git: Whether to save the experiment state to git\n",
-    "        stage_all: Whether to stage all files when saving to git\n",
-    "\n",
-    "    Returns:\n",
-    "        Decorator function that wraps experiment functions with mlflow observation\n",
-    "    \"\"\"\n",
-    "    # Use the project's backend as the source of truth\n",
-    "    backend = self.backend\n",
-    "\n",
-    "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        \n",
-    "        @wraps(func)\n",
-    "        async def mlflow_wrapped_func(*args, **kwargs):\n",
-    "            # Apply mlflow observation directly here\n",
-    "            trace_name = f\"{name_prefix}-{func.__name__}\" if name_prefix else func.__name__\n",
-    "            observed_func = trace(name=trace_name)(func)\n",
-    "            return await observed_func(*args, **kwargs)\n",
-    "        \n",
-    "        # Now create the experiment wrapper with our already-observed function\n",
-    "        experiment_wrapper = self.experiment(\n",
-    "            experiment_model, name_prefix, save_to_git, stage_all\n",
-    "        )(mlflow_wrapped_func)\n",
-    "        \n",
-    "        return t.cast(ExperimentProtocol, experiment_wrapper)\n",
-    "\n",
-    "    return decorator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Running experiment: 100%|██████████| 6/6 [00:01<00:00,  4.01it/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Experiment(name=cool_matsumoto, model=TextExperimentModel)"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await test_experiment.run_async(test_dataset)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Compare and Plot"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Note: Example code failed - this is expected during docs building: name 'get_test_directory' is not defined\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Example of using experiments with a local backend\n",
-    "import tempfile\n",
-    "import os\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "# Define a test model for our example\n",
-    "class LocalExperimentModel(BaseModel):\n",
-    "    id: int\n",
-    "    name: str\n",
-    "    description: str\n",
-    "    score: float\n",
-    "    experiment_result: str\n",
-    "\n",
-    "# The function we want to run as an experiment\n",
-    "async def process_item(item):\n",
-    "    # This would be your actual experiment logic\n",
-    "    return LocalExperimentModel(\n",
-    "        id=item.id,\n",
-    "        name=item.name,\n",
-    "        description=item.description,\n",
-    "        score=float(item.id) * 0.1,\n",
-    "        experiment_result=f\"Result for {item.name}\"\n",
-    "    )\n",
-    "\n",
-    "# Example code using local backend (will be skipped during docs build)\n",
-    "try:\n",
-    "    # Create a temporary directory for demonstration\n",
-    "    test_dir = get_test_directory()\n",
-    "    \n",
-    "    # Create a new project with local backend\n",
-    "    local_project = Project.create(\n",
-    "        name=\"test_local_experiment_project\",\n",
-    "        description=\"A test project using local backend for experiments\",\n",
-    "        backend=\"local\",\n",
-    "        root_dir=test_dir\n",
-    "    )\n",
-    "    \n",
-    "    # Define a test model for the dataset\n",
-    "    class LocalDatasetModel(BaseModel):\n",
-    "        id: int\n",
-    "        name: str\n",
-    "        description: str\n",
-    "    \n",
-    "    # Create a dataset with local backend\n",
-    "    local_dataset = local_project.create_dataset(\n",
-    "        model=LocalDatasetModel,\n",
-    "        name=\"test_experiment_dataset\"\n",
-    "    )\n",
-    "    \n",
-    "    # Add some entries to the dataset\n",
-    "    for i in range(3):\n",
-    "        entry = LocalDatasetModel(\n",
-    "            id=i,\n",
-    "            name=f\"Test Item {i}\",\n",
-    "            description=f\"Description for test item {i}\"\n",
-    "        )\n",
-    "        local_dataset.append(entry)\n",
-    "    \n",
-    "    # Create an experiment function\n",
-    "    @local_project.experiment(LocalExperimentModel)\n",
-    "    async def test_local_experiment(item):\n",
-    "        return await process_item(item)\n",
-    "    \n",
-    "    # Run the experiment\n",
-    "    experiment = await test_local_experiment.run_async(local_dataset)\n",
-    "    \n",
-    "    # Check that the experiment file exists\n",
-    "    experiment_path = local_project.get_experiment_path(experiment.name)\n",
-    "    print(f\"Experiment file exists: {os.path.exists(experiment_path)}\")\n",
-    "    \n",
-    "    # List experiments\n",
-    "    experiments = local_project.list_experiment_names()\n",
-    "    print(f\"Experiments in project: {experiments}\")\n",
-    "    \n",
-    "    # Get the experiment\n",
-    "    retrieved_experiment = local_project.get_experiment(\n",
-    "        experiment_name=experiment.name,\n",
-    "        model=LocalExperimentModel\n",
-    "    )\n",
-    "    \n",
-    "    # Load the experiment results\n",
-    "    retrieved_experiment.load()\n",
-    "    print(f\"Retrieved experiment: {retrieved_experiment}\")\n",
-    "    \n",
-    "    # Convert to DataFrame\n",
-    "    df = retrieved_experiment.to_pandas()\n",
-    "    print(\"\\nExperiment results as DataFrame:\")\n",
-    "    print(df)\n",
-    "\n",
-    "except Exception as e:\n",
-    "    print(f\"Note: Example code failed - this is expected during docs building: {e}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "from mlflow import trace\n",
-    "\n",
-    "@patch\n",
-    "def mlflow_experiment(\n",
-    "    self: Project, \n",
-    "    experiment_model, \n",
-    "    name_prefix: str = \"\",\n",
-    "    save_to_git: bool = True, \n",
-    "    stage_all: bool = True,\n",
-    "    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None\n",
-    "):\n",
-    "    \"\"\"Decorator for creating experiment functions with mlflow integration.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_model: The model type to use for experiment results\n",
-    "        name_prefix: Optional prefix for experiment names\n",
-    "        save_to_git: Whether to save experiment state to git\n",
-    "        stage_all: Whether to stage all files when saving to git\n",
-    "        backend: Backend to use for this experiment (overrides project's backend)\n",
-    "\n",
-    "    Returns:\n",
-    "        Decorator function that wraps experiment functions with mlflow observation\n",
-    "    \"\"\"\n",
-    "\n",
-    "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        \n",
-    "        @wraps(func)\n",
-    "        async def mlflow_wrapped_func(*args, **kwargs):\n",
-    "            # Apply mlflow observation directly here\n",
-    "            trace_name = f\"{name_prefix}-{func.__name__}\" if name_prefix else func.__name__\n",
-    "            observed_func = trace(name=trace_name)(func)\n",
-    "            return await observed_func(*args, **kwargs)\n",
-    "        \n",
-    "        # Now create the experiment wrapper with our already-observed function\n",
-    "        experiment_wrapper = self.experiment(\n",
-    "            experiment_model, \n",
-    "            name_prefix, \n",
-    "            save_to_git, \n",
-    "            stage_all,\n",
-    "            backend=backend\n",
-    "        )(mlflow_wrapped_func)\n",
-    "        \n",
-    "        return t.cast(ExperimentProtocol, experiment_wrapper)\n",
-    "\n",
-    "    return decorator"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "\n",
-    "import logging\n",
-    "from ragas_experimental.utils import plot_experiments_as_subplots\n",
-    "\n",
-    "@patch\n",
-    "def compare_and_plot(self: Project, experiment_names: t.List[str], model: t.Type[BaseModel], metric_names: t.List[str]):\n",
-    "    \"\"\"Compare multiple experiments and generate a plot.\n",
-    "\n",
-    "    Args:\n",
-    "        experiment_names: List of experiment IDs to compare\n",
-    "        model: Model class defining the experiment structure\n",
-    "    \"\"\"\n",
-    "    results = {}\n",
-    "    for experiment_name in tqdm(experiment_names, desc=\"Fetching experiments\"):\n",
-    "        experiment = self.get_experiment(experiment_name, model)\n",
-    "        experiment.load()\n",
-    "        results[experiment_name] = {}\n",
-    "        for row in experiment:\n",
-    "            for metric in metric_names:\n",
-    "                if metric not in results[experiment_name]:\n",
-    "                    results[experiment_name][metric] = []\n",
-    "                if hasattr(row, metric):\n",
-    "                    results[experiment_name][metric].append(getattr(row, metric))\n",
-    "                else:\n",
-    "                    results[metric].append(None)\n",
-    "                    logging.warning(f\"Metric {metric} not found in row: {row}\")\n",
-    "                    \n",
-    "    \n",
-    "    \n",
-    "    fig = plot_experiments_as_subplots(results,experiment_ids=experiment_names)\n",
-    "    fig.show()\n",
-    "        \n",
-    "        \n",
-    "        \n",
-    "        \n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import BaseModel\n",
-    "\n",
-    "class TestDataset(BaseModel):\n",
-    "    question: str\n",
-    "    citations: list[str]\n",
-    "    grading_notes: str\n",
-    "    \n",
-    "\n",
-    "class ExperimentModel(TestDataset):\n",
-    "    response: str\n",
-    "    score: str\n",
-    "    score_reason: str\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Fetching experiments: 100%|██████████| 2/2 [00:05<00:00,  2.60s/it]\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.plotly.v1+json": {
-       "config": {
-        "plotlyServerURL": "https://plot.ly"
-       },
-       "data": [
-        {
-         "hoverinfo": "text",
-         "hovertext": [
-          "Fail: 30.0%",
-          "Fail: 33.3%"
-         ],
-         "marker": {
-          "color": "#e11185"
-         },
-         "name": "Fail",
-         "showlegend": false,
-         "type": "bar",
-         "width": 0.5,
-         "x": [
-          "Exp 1",
-          "Exp 2"
-         ],
-         "xaxis": "x",
-         "y": [
-          30,
-          33.33333333333333
-         ],
-         "yaxis": "y"
-        },
-        {
-         "hoverinfo": "text",
-         "hovertext": [
-          "Pass: 70.0%",
-          "Pass: 66.7%"
-         ],
-         "marker": {
-          "color": "#1a1dc9"
-         },
-         "name": "Pass",
-         "showlegend": false,
-         "type": "bar",
-         "width": 0.5,
-         "x": [
-          "Exp 1",
-          "Exp 2"
-         ],
-         "xaxis": "x",
-         "y": [
-          70,
-          66.66666666666666
-         ],
-         "yaxis": "y"
-        }
-       ],
-       "layout": {
-        "annotations": [
-         {
-          "font": {
-           "size": 16
-          },
-          "showarrow": false,
-          "text": "Score Comparison",
-          "x": 0.5,
-          "xanchor": "center",
-          "xref": "paper",
-          "y": 1,
-          "yanchor": "bottom",
-          "yref": "paper"
-         }
-        ],
-        "barmode": "stack",
-        "height": 400,
-        "hovermode": "closest",
-        "margin": {
-         "b": 50,
-         "l": 50,
-         "r": 50,
-         "t": 80
-        },
-        "plot_bgcolor": "white",
-        "showlegend": false,
-        "template": {
-         "data": {
-          "bar": [
-           {
-            "error_x": {
-             "color": "#2a3f5f"
-            },
-            "error_y": {
-             "color": "#2a3f5f"
-            },
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "bar"
-           }
-          ],
-          "barpolar": [
-           {
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "barpolar"
-           }
-          ],
-          "carpet": [
-           {
-            "aaxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "baxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "type": "carpet"
-           }
-          ],
-          "choropleth": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "choropleth"
-           }
-          ],
-          "contour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "contour"
-           }
-          ],
-          "contourcarpet": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "contourcarpet"
-           }
-          ],
-          "heatmap": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "heatmap"
-           }
-          ],
-          "histogram": [
-           {
-            "marker": {
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "histogram"
-           }
-          ],
-          "histogram2d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2d"
-           }
-          ],
-          "histogram2dcontour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2dcontour"
-           }
-          ],
-          "mesh3d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "mesh3d"
-           }
-          ],
-          "parcoords": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "parcoords"
-           }
-          ],
-          "pie": [
-           {
-            "automargin": true,
-            "type": "pie"
-           }
-          ],
-          "scatter": [
-           {
-            "fillpattern": {
-             "fillmode": "overlay",
-             "size": 10,
-             "solidity": 0.2
-            },
-            "type": "scatter"
-           }
-          ],
-          "scatter3d": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatter3d"
-           }
-          ],
-          "scattercarpet": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattercarpet"
-           }
-          ],
-          "scattergeo": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergeo"
-           }
-          ],
-          "scattergl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergl"
-           }
-          ],
-          "scattermap": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermap"
-           }
-          ],
-          "scattermapbox": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermapbox"
-           }
-          ],
-          "scatterpolar": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolar"
-           }
-          ],
-          "scatterpolargl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolargl"
-           }
-          ],
-          "scatterternary": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterternary"
-           }
-          ],
-          "surface": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "surface"
-           }
-          ],
-          "table": [
-           {
-            "cells": {
-             "fill": {
-              "color": "#EBF0F8"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "header": {
-             "fill": {
-              "color": "#C8D4E3"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "type": "table"
-           }
-          ]
-         },
-         "layout": {
-          "annotationdefaults": {
-           "arrowcolor": "#2a3f5f",
-           "arrowhead": 0,
-           "arrowwidth": 1
-          },
-          "autotypenumbers": "strict",
-          "coloraxis": {
-           "colorbar": {
-            "outlinewidth": 0,
-            "ticks": ""
-           }
-          },
-          "colorscale": {
-           "diverging": [
-            [
-             0,
-             "#8e0152"
-            ],
-            [
-             0.1,
-             "#c51b7d"
-            ],
-            [
-             0.2,
-             "#de77ae"
-            ],
-            [
-             0.3,
-             "#f1b6da"
-            ],
-            [
-             0.4,
-             "#fde0ef"
-            ],
-            [
-             0.5,
-             "#f7f7f7"
-            ],
-            [
-             0.6,
-             "#e6f5d0"
-            ],
-            [
-             0.7,
-             "#b8e186"
-            ],
-            [
-             0.8,
-             "#7fbc41"
-            ],
-            [
-             0.9,
-             "#4d9221"
-            ],
-            [
-             1,
-             "#276419"
-            ]
-           ],
-           "sequential": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ],
-           "sequentialminus": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ]
-          },
-          "colorway": [
-           "#636efa",
-           "#EF553B",
-           "#00cc96",
-           "#ab63fa",
-           "#FFA15A",
-           "#19d3f3",
-           "#FF6692",
-           "#B6E880",
-           "#FF97FF",
-           "#FECB52"
-          ],
-          "font": {
-           "color": "#2a3f5f"
-          },
-          "geo": {
-           "bgcolor": "white",
-           "lakecolor": "white",
-           "landcolor": "#E5ECF6",
-           "showlakes": true,
-           "showland": true,
-           "subunitcolor": "white"
-          },
-          "hoverlabel": {
-           "align": "left"
-          },
-          "hovermode": "closest",
-          "mapbox": {
-           "style": "light"
-          },
-          "paper_bgcolor": "white",
-          "plot_bgcolor": "#E5ECF6",
-          "polar": {
-           "angularaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "radialaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "scene": {
-           "xaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "yaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "zaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           }
-          },
-          "shapedefaults": {
-           "line": {
-            "color": "#2a3f5f"
-           }
-          },
-          "ternary": {
-           "aaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "baxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "caxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "title": {
-           "x": 0.05
-          },
-          "xaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          },
-          "yaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          }
-         }
-        },
-        "title": {
-         "text": "Experiment Comparison by Metrics"
-        },
-        "width": 400,
-        "xaxis": {
-         "anchor": "y",
-         "domain": [
-          0,
-          1
-         ],
-         "linecolor": "black",
-         "linewidth": 1,
-         "showgrid": false,
-         "showline": true,
-         "tickangle": 0,
-         "title": {
-          "text": "Experiments"
-         }
-        },
-        "yaxis": {
-         "anchor": "x",
-         "domain": [
-          0,
-          1
-         ],
-         "gridcolor": "lightgray",
-         "linecolor": "black",
-         "linewidth": 1,
-         "range": [
-          0,
-          105
-         ],
-         "showgrid": true,
-         "showline": true,
-         "ticksuffix": "%",
-         "title": {
-          "text": "Percentage (%)"
-         }
-        }
-       }
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "p.compare_and_plot(\n",
-    "    experiment_names=[\"xenodochial_hoare\",\"confident_liskov\"],\n",
-    "    model=ExperimentModel,\n",
-    "    metric_names=[\"score\"]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/project/naming.ipynb b/experimental/old_nbs/api/project/naming.ipynb
deleted file mode 100644
index dac4d28db..000000000
--- a/experimental/old_nbs/api/project/naming.ipynb
+++ /dev/null
@@ -1,315 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Memorable Namer\n",
-    "> A helper module to create fun, memorable names for experiments, datasets or anything\n",
-    "\n",
-    "Experiment Namer is a lightweight Python module that generates memorable, quirky names for your experiments or projects. It pairs whimsical adjectives with names of influential computer scientists and tech entrepreneurs to create distinctive identifiers like \"elegant_turing\" or \"bold_hopper\". Perfect for when you need to label multiple experiments without overthinking it, while still keeping them easily distinguishable and fun to reference."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp project.naming"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "import random"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class MemorableNames:\n",
-    "    def __init__(self):\n",
-    "        # List of adjectives (similar to what Docker uses)\n",
-    "        self.adjectives = [\n",
-    "            \"admiring\",\n",
-    "            \"adoring\",\n",
-    "            \"affectionate\",\n",
-    "            \"agitated\",\n",
-    "            \"amazing\",\n",
-    "            \"angry\",\n",
-    "            \"awesome\",\n",
-    "            \"blissful\",\n",
-    "            \"bold\",\n",
-    "            \"boring\",\n",
-    "            \"brave\",\n",
-    "            \"busy\",\n",
-    "            \"charming\",\n",
-    "            \"clever\",\n",
-    "            \"cool\",\n",
-    "            \"compassionate\",\n",
-    "            \"competent\",\n",
-    "            \"condescending\",\n",
-    "            \"confident\",\n",
-    "            \"cranky\",\n",
-    "            \"crazy\",\n",
-    "            \"dazzling\",\n",
-    "            \"determined\",\n",
-    "            \"distracted\",\n",
-    "            \"dreamy\",\n",
-    "            \"eager\",\n",
-    "            \"ecstatic\",\n",
-    "            \"elastic\",\n",
-    "            \"elated\",\n",
-    "            \"elegant\",\n",
-    "            \"eloquent\",\n",
-    "            \"epic\",\n",
-    "            \"fervent\",\n",
-    "            \"festive\",\n",
-    "            \"flamboyant\",\n",
-    "            \"focused\",\n",
-    "            \"friendly\",\n",
-    "            \"frosty\",\n",
-    "            \"gallant\",\n",
-    "            \"gifted\",\n",
-    "            \"goofy\",\n",
-    "            \"gracious\",\n",
-    "            \"happy\",\n",
-    "            \"hardcore\",\n",
-    "            \"heuristic\",\n",
-    "            \"hopeful\",\n",
-    "            \"hungry\",\n",
-    "            \"infallible\",\n",
-    "            \"inspiring\",\n",
-    "            \"jolly\",\n",
-    "            \"jovial\",\n",
-    "            \"keen\",\n",
-    "            \"kind\",\n",
-    "            \"laughing\",\n",
-    "            \"loving\",\n",
-    "            \"lucid\",\n",
-    "            \"magical\",\n",
-    "            \"mystifying\",\n",
-    "            \"modest\",\n",
-    "            \"musing\",\n",
-    "            \"naughty\",\n",
-    "            \"nervous\",\n",
-    "            \"nifty\",\n",
-    "            \"nostalgic\",\n",
-    "            \"objective\",\n",
-    "            \"optimistic\",\n",
-    "            \"peaceful\",\n",
-    "            \"pedantic\",\n",
-    "            \"pensive\",\n",
-    "            \"practical\",\n",
-    "            \"priceless\",\n",
-    "            \"quirky\",\n",
-    "            \"quizzical\",\n",
-    "            \"relaxed\",\n",
-    "            \"reverent\",\n",
-    "            \"romantic\",\n",
-    "            \"sad\",\n",
-    "            \"serene\",\n",
-    "            \"sharp\",\n",
-    "            \"silly\",\n",
-    "            \"sleepy\",\n",
-    "            \"stoic\",\n",
-    "            \"stupefied\",\n",
-    "            \"suspicious\",\n",
-    "            \"sweet\",\n",
-    "            \"tender\",\n",
-    "            \"thirsty\",\n",
-    "            \"trusting\",\n",
-    "            \"upbeat\",\n",
-    "            \"vibrant\",\n",
-    "            \"vigilant\",\n",
-    "            \"vigorous\",\n",
-    "            \"wizardly\",\n",
-    "            \"wonderful\",\n",
-    "            \"xenodochial\",\n",
-    "            \"youthful\",\n",
-    "            \"zealous\",\n",
-    "            \"zen\",\n",
-    "        ]\n",
-    "\n",
-    "        # List of influential computer scientists and tech entrepreneurs\n",
-    "        self.scientists = [\n",
-    "            \"turing\",\n",
-    "            \"hopper\",\n",
-    "            \"knuth\",\n",
-    "            \"torvalds\",\n",
-    "            \"ritchie\",\n",
-    "            \"thompson\",\n",
-    "            \"dijkstra\",\n",
-    "            \"kay\",\n",
-    "            \"wozniak\",\n",
-    "            \"gates\",\n",
-    "            \"jobs\",\n",
-    "            \"musk\",\n",
-    "            \"bezos\",\n",
-    "            \"lovelace\",\n",
-    "            \"berners_lee\",\n",
-    "            \"cerf\",\n",
-    "            \"gosling\",\n",
-    "            \"kernighan\",\n",
-    "            \"lamport\",\n",
-    "            \"mccarthy\",\n",
-    "            \"minsky\",\n",
-    "            \"rossum\",\n",
-    "            \"backus\",\n",
-    "            \"engelbart\",\n",
-    "            \"hamilton\",\n",
-    "            \"chomsky\",\n",
-    "            \"shannon\",\n",
-    "            \"zuckerberg\",\n",
-    "            \"page\",\n",
-    "            \"brin\",\n",
-    "            \"matsumoto\",\n",
-    "            \"stallman\",\n",
-    "            \"stroustrup\",\n",
-    "            \"cook\",\n",
-    "            \"neumann\",\n",
-    "            \"babbage\",\n",
-    "            \"tanenbaum\",\n",
-    "            \"rivest\",\n",
-    "            \"shamir\",\n",
-    "            \"adleman\",\n",
-    "            \"carmack\",\n",
-    "            \"andreessen\",\n",
-    "            \"ullman\",\n",
-    "            \"postel\",\n",
-    "            \"huffman\",\n",
-    "            \"boole\",\n",
-    "            \"curry\",\n",
-    "            \"liskov\",\n",
-    "            \"wing\",\n",
-    "            \"goldwasser\",\n",
-    "            \"hoare\",\n",
-    "            \"milner\",\n",
-    "            \"perlis\",\n",
-    "            \"sutherland\",\n",
-    "            \"tarjan\",\n",
-    "            \"valiant\",\n",
-    "            \"yao\",\n",
-    "            \"hopcroft\",\n",
-    "            \"naur\",\n",
-    "            \"wilkes\",\n",
-    "            \"codd\",\n",
-    "            \"diffie\",\n",
-    "            \"hellman\",\n",
-    "            \"pearl\",\n",
-    "            \"thiel\",\n",
-    "            \"narayen\",\n",
-    "            \"nadella\",\n",
-    "            \"pichai\",\n",
-    "            \"dorsey\",\n",
-    "        ]\n",
-    "\n",
-    "        self.used_names = set()\n",
-    "\n",
-    "    def generate_name(self):\n",
-    "        \"\"\"Generate a single experiment name.\"\"\"\n",
-    "        adjective = random.choice(self.adjectives)\n",
-    "        scientist = random.choice(self.scientists)\n",
-    "        return f\"{adjective}_{scientist}\"\n",
-    "\n",
-    "    def generate_unique_name(self):\n",
-    "        \"\"\"Generate a unique experiment name.\"\"\"\n",
-    "        attempts = 0\n",
-    "        max_attempts = 100  # Prevent infinite loops\n",
-    "\n",
-    "        while attempts < max_attempts:\n",
-    "            name = self.generate_name()\n",
-    "            if name not in self.used_names:\n",
-    "                self.used_names.add(name)\n",
-    "                return name\n",
-    "            attempts += 1\n",
-    "\n",
-    "        # If we exhaust our combinations, add a random suffix\n",
-    "        base_name = self.generate_name()\n",
-    "        unique_name = f\"{base_name}_{random.randint(1000, 9999)}\"\n",
-    "        self.used_names.add(unique_name)\n",
-    "        return unique_name\n",
-    "\n",
-    "    def generate_unique_names(self, count):\n",
-    "        \"\"\"Generate multiple unique experiment names.\"\"\"\n",
-    "        return [self.generate_unique_name() for _ in range(count)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['hardcore_liskov',\n",
-       " 'cranky_ritchie',\n",
-       " 'cool_zuckerberg',\n",
-       " 'competent_berners_lee',\n",
-       " 'serene_huffman',\n",
-       " 'awesome_engelbart',\n",
-       " 'reverent_valiant',\n",
-       " 'romantic_stallman',\n",
-       " 'zen_carmack',\n",
-       " 'musing_page']"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "mn = MemorableNames()\n",
-    "mn.generate_unique_names(10)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "6762"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# now many combinations are there?\n",
-    "len(mn.scientists) * len(mn.adjectives)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/prompt/base.ipynb b/experimental/old_nbs/api/prompt/base.ipynb
deleted file mode 100644
index e407d4fb6..000000000
--- a/experimental/old_nbs/api/prompt/base.ipynb
+++ /dev/null
@@ -1,226 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp prompt.base"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Base Class"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "import re\n",
-    "\n",
-    "class Prompt:\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        instruction: str,\n",
-    "        examples: t.Optional[t.List[t.Tuple[t.Dict, t.Dict]]] = None\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        Create a simple prompt object.\n",
-    "        \n",
-    "        Parameters:\n",
-    "        -----------\n",
-    "        instruction : str\n",
-    "            The prompt instruction template with placeholders like {response}, {expected_answer}\n",
-    "        examples : Optional[List[Tuple[Dict, Dict]]]\n",
-    "            List of (input_dict, output_dict) pairs for few-shot learning\n",
-    "        \"\"\"\n",
-    "        self.instruction = instruction\n",
-    "        self.examples = []\n",
-    "        \n",
-    "        # Validate the instruction\n",
-    "        self._validate_instruction()\n",
-    "        \n",
-    "        # Add examples if provided\n",
-    "        if examples:\n",
-    "            for inputs, output in examples:\n",
-    "                self.add_example(inputs, output)\n",
-    "    \n",
-    "    def _validate_instruction(self):\n",
-    "        \"\"\"Ensure the instruction contains at least one placeholder.\"\"\"\n",
-    "        if not re.findall(r\"\\{(\\w+)\\}\", self.instruction):\n",
-    "            raise ValueError(\"Instruction must contain at least one placeholder like {response}\")\n",
-    "    \n",
-    "    def format(self, **kwargs) -> str:\n",
-    "        \"\"\"Format the prompt with the provided variables.\"\"\"\n",
-    "\n",
-    "        prompt_parts = []\n",
-    "        prompt_parts.append(self.instruction.format(**kwargs))\n",
-    "        prompt_parts.append(self._format_examples())\n",
-    "\n",
-    "        # Combine all parts\n",
-    "        return \"\\n\\n\".join(prompt_parts)\n",
-    "    \n",
-    "    def _format_examples(self) -> str:\n",
-    "        \n",
-    "        # Add examples in a simple format\n",
-    "        examples = []\n",
-    "        if self.examples:\n",
-    "            examples.append(\"Examples:\")\n",
-    "            for i, (inputs, output) in enumerate(self.examples, 1):\n",
-    "                example_input = \"\\n\".join([f\"{k}: {v}\" for k, v in inputs.items()])\n",
-    "                example_output = \"\\n\".join([f\"{k}: {v}\" for k, v in output.items()])\n",
-    "                \n",
-    "                examples.append(f\"Example {i}:\\nInput:\\n{example_input}\\nOutput:\\n{example_output}\")\n",
-    "        \n",
-    "        return \"\\n\\n\".join(examples) if examples else \"\"\n",
-    "        \n",
-    "    \n",
-    "    def add_example(self, inputs: t.Dict, output: t.Dict) -> None:\n",
-    "        \"\"\"\n",
-    "        Add an example to the prompt.\n",
-    "        \n",
-    "        Parameters:\n",
-    "        -----------\n",
-    "        inputs : Dict\n",
-    "            Dictionary of input values\n",
-    "        output : Dict\n",
-    "            Dictionary of output values\n",
-    "            \n",
-    "        Raises:\n",
-    "        -------\n",
-    "        TypeError\n",
-    "            If inputs or output is not a dictionary\n",
-    "        \"\"\"\n",
-    "        if not isinstance(inputs, dict):\n",
-    "            raise TypeError(f\"Expected inputs to be dict, got {type(inputs).__name__}\")\n",
-    "        if not isinstance(output, dict):\n",
-    "            raise TypeError(f\"Expected output to be dict, got {type(output).__name__}\")\n",
-    "            \n",
-    "        self.examples.append((inputs, output))\n",
-    "    \n",
-    "    def __str__(self) -> str:\n",
-    "        \"\"\"String representation showing the instruction.\"\"\"\n",
-    "        return f\"Prompt(instruction='{self.instruction}',\\n examples={self.examples})\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example Usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluate if given answer You can get a full refund if you miss your flight. is same as expected answer Refunds depend on ticket type; only refundable tickets qualify for full refunds.\n",
-      "\n",
-      "Examples:\n",
-      "\n",
-      "Example 1:\n",
-      "Input:\n",
-      "response: You can get a full refund if you miss your flight.\n",
-      "expected_answer: Refunds depend on ticket type; only refundable tickets qualify for full refunds.\n",
-      "Output:\n",
-      "score: fail\n",
-      "\n",
-      "Example 2:\n",
-      "Input:\n",
-      "response: Each passenger gets 1 free checked bag up to 23kg.\n",
-      "expected_answer: Each passenger gets 1 free checked bag up to 23kg.\n",
-      "Output:\n",
-      "score: pass\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Create a basic prompt\n",
-    "prompt = Prompt(\n",
-    "    instruction=\"Evaluate if given answer {response} is same as expected answer {expected_answer}\"\n",
-    ")\n",
-    "\n",
-    "# Add examples with dict inputs and dict outputs\n",
-    "prompt.add_example(\n",
-    "    {\n",
-    "        \"response\": \"You can get a full refund if you miss your flight.\",\n",
-    "        \"expected_answer\": \"Refunds depend on ticket type; only refundable tickets qualify for full refunds.\"\n",
-    "    },\n",
-    "    {\"score\": \"fail\"}\n",
-    ")\n",
-    "\n",
-    "prompt.add_example(\n",
-    "    {\n",
-    "        \"response\": \"Each passenger gets 1 free checked bag up to 23kg.\",\n",
-    "        \"expected_answer\": \"Each passenger gets 1 free checked bag up to 23kg.\"\n",
-    "    },\n",
-    "    {\"score\": \"pass\"}\n",
-    ")\n",
-    "\n",
-    "print(prompt.format(response=\"You can get a full refund if you miss your flight.\", expected_answer=\"Refunds depend on ticket type; only refundable tickets qualify for full refunds.\"))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Prompt(instruction='Evaluate if given answer {response} is same as expected answer {expected_answer}',\n",
-      " examples=Examples:\n",
-      "\n",
-      "Example 1:\n",
-      "Input:\n",
-      "response: You can get a full refund if you miss your flight.\n",
-      "expected_answer: Refunds depend on ticket type; only refundable tickets qualify for full refunds.\n",
-      "Output:\n",
-      "score: fail\n",
-      "\n",
-      "Example 2:\n",
-      "Input:\n",
-      "response: Each passenger gets 1 free checked bag up to 23kg.\n",
-      "expected_answer: Each passenger gets 1 free checked bag up to 23kg.\n",
-      "Output:\n",
-      "score: pass)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(str(prompt))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/prompt/dynamic_few_shot.ipynb b/experimental/old_nbs/api/prompt/dynamic_few_shot.ipynb
deleted file mode 100644
index b418fe1a3..000000000
--- a/experimental/old_nbs/api/prompt/dynamic_few_shot.ipynb
+++ /dev/null
@@ -1,319 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp prompt.dynamic_few_shot"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Dynamic Few-Shot Learning"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import typing as t\n",
-    "import numpy as np\n",
-    "from abc import ABC, abstractmethod\n",
-    "\n",
-    "from ragas_experimental.prompt.base import Prompt\n",
-    "from ragas_experimental.embedding import BaseEmbedding\n",
-    "\n",
-    "class ExampleStore(ABC):\n",
-    "    @abstractmethod\n",
-    "    def get_examples(\n",
-    "        self, data: t.Dict, top_k: int = 5\n",
-    "    ) -> t.List[t.Tuple[t.Dict, t.Dict]]:\n",
-    "        \"\"\"Get top_k most similar examples to data.\"\"\"\n",
-    "        pass\n",
-    "\n",
-    "    @abstractmethod\n",
-    "    def add_example(self, inputs: t.Dict, output: t.Dict) -> None:\n",
-    "        \"\"\"Add an example to the store.\"\"\"\n",
-    "        pass\n",
-    "\n",
-    "\n",
-    "class InMemoryExampleStore(ExampleStore):\n",
-    "    def __init__(self, embedding_model=None):\n",
-    "        \"\"\"\n",
-    "        Initialize an in-memory example store with optional embedding model.\n",
-    "        \n",
-    "        Args:\n",
-    "            embedding_model: Model used to generate embeddings (OpenAI or similar)\n",
-    "        \"\"\"\n",
-    "        self.embedding_model = embedding_model\n",
-    "        self._examples: t.List[t.Tuple[t.Dict, t.Dict]] = []\n",
-    "        self._embeddings_list: t.List[t.List[float]] = []\n",
-    "    \n",
-    "    def _get_embedding(self, data: t.Dict) -> t.List[float]:\n",
-    "        \"\"\"Convert input dict to an embedding vector.\"\"\"\n",
-    "        if self.embedding_model is None:\n",
-    "            return []\n",
-    "        \n",
-    "        # Serialize the dictionary to text\n",
-    "        text = \"\\n\".join([f\"{k}: {v}\" for k, v in data.items()])\n",
-    "        return self.embedding_model.embed_text(text)\n",
-    "    \n",
-    "    def add_example(self, inputs: t.Dict, output: t.Dict) -> None:\n",
-    "        \"\"\"Add an example to the store with its embedding.\"\"\"\n",
-    "        if not isinstance(inputs, dict):\n",
-    "            raise TypeError(f\"Expected inputs to be dict, got {type(inputs).__name__}\")\n",
-    "        if not isinstance(output, dict):\n",
-    "            raise TypeError(f\"Expected output to be dict, got {type(output).__name__}\")\n",
-    "            \n",
-    "        self._examples.append((inputs, output))\n",
-    "        \n",
-    "        if self.embedding_model:\n",
-    "            embedding = self._get_embedding(inputs)\n",
-    "            self._embeddings_list.append(embedding)\n",
-    "    \n",
-    "    def get_examples(\n",
-    "        self, data: t.Dict, top_k: int = 5, threshold: float = 0.7\n",
-    "    ) -> t.List[t.Tuple[t.Dict, t.Dict]]:\n",
-    "        \"\"\"Get examples most similar to the input data.\"\"\"\n",
-    "        if not self._examples:\n",
-    "            return []\n",
-    "            \n",
-    "        if not self.embedding_model or not self._embeddings_list:\n",
-    "            # If no embedding model, return the most recent examples\n",
-    "            return self._examples[-top_k:]\n",
-    "        \n",
-    "        # Get embedding for the query\n",
-    "        query_embedding = self._get_embedding(data)\n",
-    "        \n",
-    "        # Find most similar examples\n",
-    "        indices = self._get_nearest_examples(\n",
-    "            query_embedding, self._embeddings_list, top_k, threshold\n",
-    "        )\n",
-    "        \n",
-    "        # Return the examples at those indices\n",
-    "        return [self._examples[i] for i in indices]\n",
-    "    \n",
-    "    def _get_nearest_examples(\n",
-    "        self,\n",
-    "        query_embedding: t.List[float],\n",
-    "        embeddings: t.List[t.List[float]],\n",
-    "        top_k: int = 3,\n",
-    "        threshold: float = 0.7,\n",
-    "    ) -> t.List[int]:\n",
-    "        \"\"\"Find indices of the nearest examples based on cosine similarity.\"\"\"\n",
-    "        # Convert to numpy arrays for efficient computation\n",
-    "        query = np.array(query_embedding)\n",
-    "        embed_matrix = np.array(embeddings)\n",
-    "        \n",
-    "        # Calculate cosine similarity\n",
-    "        similarities = np.dot(embed_matrix, query) / (\n",
-    "            np.linalg.norm(embed_matrix, axis=1) * np.linalg.norm(query) + 1e-8\n",
-    "        )\n",
-    "        \n",
-    "        # Get indices of similarities above threshold\n",
-    "        valid_indices = np.where(similarities >= threshold)[0]\n",
-    "        \n",
-    "        # Sort by similarity and get top-k\n",
-    "        if len(valid_indices) > 0:\n",
-    "            top_indices = valid_indices[np.argsort(similarities[valid_indices])[-top_k:]]\n",
-    "            # Convert numpy indices to Python ints\n",
-    "            return [int(idx) for idx in top_indices]\n",
-    "        \n",
-    "        # If no examples meet threshold, return most recent examples\n",
-    "        return list(range(max(0, len(embeddings) - top_k), len(embeddings)))\n",
-    "    \n",
-    "    def __len__(self):\n",
-    "        return len(self._examples)\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
-    "class DynamicFewShotPrompt(Prompt):\n",
-    "    \n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        prompt: Prompt,\n",
-    "        example_store: InMemoryExampleStore,\n",
-    "        num_examples: int = 3\n",
-    "    ):\n",
-    "        \n",
-    "        self.example_store = example_store\n",
-    "        super().__init__(prompt.instruction, prompt.examples)\n",
-    "        self.num_examples = num_examples\n",
-    "        \n",
-    "        for example in prompt.examples:\n",
-    "            self.example_store.add_example(*example)\n",
-    "    \n",
-    "    def format(self, **kwargs) -> str:\n",
-    "        \"\"\"Format the prompt with dynamically retrieved examples.\"\"\"\n",
-    "        prompt_parts = []\n",
-    "        \n",
-    "        # Add instruction with variables filled in\n",
-    "        prompt_parts.append(self.instruction.format(**kwargs))\n",
-    "        \n",
-    "        # Get dynamic examples if we have a store and inputs\n",
-    "        dynamic_examples = []\n",
-    "        if self.example_store and kwargs:\n",
-    "            dynamic_examples = self.example_store.get_examples(kwargs, self.num_examples)\n",
-    "        \n",
-    "        # Add examples in a simple format\n",
-    "        if dynamic_examples:\n",
-    "            prompt_parts.append(\"Examples:\")\n",
-    "            for i, (inputs, output) in enumerate(dynamic_examples, 1):\n",
-    "                example_input = \"\\n\".join([f\"{k}: {v}\" for k, v in inputs.items()])\n",
-    "                example_output = \"\\n\".join([f\"{k}: {v}\" for k, v in output.items()])\n",
-    "                \n",
-    "                prompt_parts.append(f\"Example {i}:\\nInput:\\n{example_input}\\nOutput:\\n{example_output}\")\n",
-    "        \n",
-    " \n",
-    "        \n",
-    "        # Combine all parts\n",
-    "        return \"\\n\\n\".join(prompt_parts)\n",
-    "    \n",
-    "    def add_example(self, inputs: t.Dict, output: t.Dict) -> None:\n",
-    "        \"\"\"\n",
-    "        Add an example to both the prompt and the example store.\n",
-    "        \n",
-    "        Parameters:\n",
-    "        -----------\n",
-    "        inputs : Dict\n",
-    "            Dictionary of input values\n",
-    "        output : Dict\n",
-    "            Dictionary of output values\n",
-    "            \n",
-    "        Raises:\n",
-    "        -------\n",
-    "        TypeError\n",
-    "            If inputs or output is not a dictionary\n",
-    "        \"\"\"\n",
-    "        if (inputs, output) not in self.examples:\n",
-    "            self.examples.append((inputs, output))\n",
-    "            \n",
-    "        # Add to example store\n",
-    "        if isinstance(self.example_store, ExampleStore) and (inputs, output) not in self.example_store._examples:\n",
-    "            self.example_store.add_example(inputs, output)\n",
-    "    \n",
-    "    @classmethod\n",
-    "    def from_prompt(\n",
-    "        cls,\n",
-    "        prompt: Prompt,\n",
-    "        embedding_model: BaseEmbedding,\n",
-    "        num_examples: int = 3\n",
-    "    ) -> \"DynamicFewShotPrompt\":\n",
-    "        \"\"\"Create a DynamicFewShotPrompt from a Prompt object.\"\"\"\n",
-    "        example_store = InMemoryExampleStore(embedding_model=embedding_model)\n",
-    "        \n",
-    "        few_shot_prompt = cls(\n",
-    "            prompt=prompt,\n",
-    "            example_store=example_store,\n",
-    "            num_examples=num_examples\n",
-    "        )\n",
-    "        \n",
-    "        return few_shot_prompt"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Example Usage"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluate if given answer Regularly updating your software reduces the risk of vulnerabilities. is same as expected answer Keeping software up to date helps patch known security flaws and prevents exploits.\n",
-      "\n",
-      "Examples:\n",
-      "\n",
-      "Example 1:\n",
-      "Input:\n",
-      "response: Using two-factor authentication greatly enhances account security.\n",
-      "expected_answer: Two-factor authentication adds a layer of protection by requiring a second form of identity verification.\n",
-      "Output:\n",
-      "score: fail\n"
-     ]
-    }
-   ],
-   "source": [
-    "#| eval: false\n",
-    "from ragas_experimental.embedding import ragas_embedding\n",
-    "from ragas_experimental.prompt import Prompt\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "embedding = ragas_embedding(provider=\"openai\", client=OpenAI(),model=\"text-embedding-3-small\")\n",
-    "\n",
-    "# Create a basic prompt\n",
-    "prompt = Prompt(\n",
-    "    instruction=\"Evaluate if given answer {response} is same as expected answer {expected_answer}\"\n",
-    ")\n",
-    "\n",
-    "# Add examples with dict inputs and dict outputs\n",
-    "prompt.add_example(\n",
-    "    {\n",
-    "        \"response\": \"You can get a full refund if you miss your flight.\",\n",
-    "        \"expected_answer\": \"Refunds depend on ticket type; only refundable tickets qualify for full refunds.\"\n",
-    "    },\n",
-    "    {\"score\": \"fail\"}\n",
-    ")\n",
-    "\n",
-    "prompt = DynamicFewShotPrompt.from_prompt(\n",
-    "    prompt,\n",
-    "    embedding_model=embedding,\n",
-    "    num_examples=1\n",
-    ")\n",
-    "\n",
-    "prompt.add_example(\n",
-    "    {\n",
-    "        \"response\": \"Bananas are high in potassium and great for quick energy.\",\n",
-    "        \"expected_answer\": \"Bananas provide potassium and are a good source of fast-digesting carbohydrates.\"\n",
-    "    },\n",
-    "    {\"score\": \"pass\"}\n",
-    ")\n",
-    "\n",
-    "prompt.add_example(\n",
-    "    {\n",
-    "        \"response\": \"Using two-factor authentication greatly enhances account security.\",\n",
-    "        \"expected_answer\": \"Two-factor authentication adds a layer of protection by requiring a second form of identity verification.\"\n",
-    "    },\n",
-    "    {\"score\": \"fail\"}\n",
-    ")\n",
-    "\n",
-    "\n",
-    "prompt.example_store.get_examples(\n",
-    "{\n",
-    "        \"response\": \"Regularly updating your software reduces the risk of vulnerabilities.\",\n",
-    "        \"expected_answer\": \"Keeping software up to date helps patch known security flaws and prevents exploits.\"\n",
-    "    })\n",
-    "\n",
-    "print(prompt.format(**{\n",
-    "        \"response\": \"Regularly updating your software reduces the risk of vulnerabilities.\",\n",
-    "        \"expected_answer\": \"Keeping software up to date helps patch known security flaws and prevents exploits.\"\n",
-    "    }))"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/tracing/langfuse.ipynb b/experimental/old_nbs/api/tracing/langfuse.ipynb
deleted file mode 100644
index 465b113fa..000000000
--- a/experimental/old_nbs/api/tracing/langfuse.ipynb
+++ /dev/null
@@ -1,183 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Langfuse Tracing\n",
-    "\n",
-    "> Utils to help with interact with langfuse traces"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | default_exp tracing.langfuse"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "import typing as t\n",
-    "import asyncio\n",
-    "import logging\n",
-    "from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse\n",
-    "\n",
-    "from langfuse.api import Observation, TraceWithFullDetails\n",
-    "from langfuse.decorators import langfuse_context, observe\n",
-    "from langfuse.utils.langfuse_singleton import LangfuseSingleton"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "# just adding it to the namespace\n",
-    "observe = observe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "logger = logging.getLogger(__name__)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "class LangfuseTrace:\n",
-    "    def __init__(self, trace: TraceWithFullDetails):\n",
-    "        self.trace = trace\n",
-    "        self._langfuse_client = langfuse_context.client_instance\n",
-    "\n",
-    "    def get_url(self):\n",
-    "        return langfuse_context.get_current_trace_url()\n",
-    "\n",
-    "    def filter(self, span_name: str) -> t.List[Observation]:\n",
-    "        trace = self._langfuse_client.fetch_trace(self.trace.id)\n",
-    "        return [span for span in trace.data.observations if span.name == span_name]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "async def sync_trace(\n",
-    "    trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2\n",
-    ") -> LangfuseTrace:\n",
-    "    \"\"\"Wait for a Langfuse trace to be synced to the server.\n",
-    "\n",
-    "    Args:\n",
-    "        trace_id: The ID of the trace to wait for\n",
-    "        max_retries: Maximum number of retry attempts (default: 10)\n",
-    "        delay: Delay between retries in seconds (default: 0.5)\n",
-    "\n",
-    "    Returns:\n",
-    "        Trace object if found, None if not found after retries\n",
-    "    \"\"\"\n",
-    "    if trace_id is None:\n",
-    "        # if no trace id is provided, get the current trace id\n",
-    "        trace_id = langfuse_context.get_current_trace_id()\n",
-    "\n",
-    "    if not trace_id:\n",
-    "        raise ValueError(\n",
-    "            \"No trace id found. Please ensure you are running this function within a function decorated with @observe().\"\n",
-    "        )\n",
-    "    for _ in range(max_retries):\n",
-    "        langfuse_client = LangfuseSingleton().get()\n",
-    "        try:\n",
-    "            # you can also use the async api - langfuse_client.async_api.trace.get(trace_id)\n",
-    "            # .client might be deprecated in the future (didn't change it for superme)\n",
-    "            trace = langfuse_client.client.trace.get(trace_id)\n",
-    "            if trace:\n",
-    "                return LangfuseTrace(trace=trace)\n",
-    "        except Exception as e:\n",
-    "            logger.debug(f\"Trace {trace_id} not yet synced: {str(e)}\")\n",
-    "\n",
-    "        await asyncio.sleep(delay)\n",
-    "\n",
-    "    raise ValueError(f\"Trace {trace_id} not found after {max_retries} attempts\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def add_query_param(url, param_name, param_value):\n",
-    "    \"\"\"Add a query parameter to a URL.\"\"\"\n",
-    "    # Parse the URL\n",
-    "    url_parts = list(urlparse(url))\n",
-    "\n",
-    "    # Get query params as a dict and add new param\n",
-    "    query_dict = dict(parse_qsl(url_parts[4]))\n",
-    "    query_dict[param_name] = param_value\n",
-    "\n",
-    "    # Replace the query part with updated params\n",
-    "    url_parts[4] = urlencode(query_dict)\n",
-    "\n",
-    "    # Reconstruct the URL\n",
-    "    return urlunparse(url_parts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "https://example.com/path?existing=value&new_param=new_value\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Example usage\n",
-    "url = \"https://example.com/path?existing=value\"\n",
-    "new_url = add_query_param(url, \"new_param\", \"new_value\")\n",
-    "print(new_url)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/tracing/mlflow.ipynb b/experimental/old_nbs/api/tracing/mlflow.ipynb
deleted file mode 100644
index 2618e17b1..000000000
--- a/experimental/old_nbs/api/tracing/mlflow.ipynb
+++ /dev/null
@@ -1,96 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# MLflow tracing\n",
-    "> tracing using mlflow"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp tracing.mlflow"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import os\n",
-    "import typing as t\n",
-    "from mlflow.entities.trace import Trace\n",
-    "from mlflow.entities.span import Span\n",
-    "from mlflow import get_last_active_trace\n",
-    "\n",
-    "class MLflowTrace:\n",
-    "    \n",
-    "    def __init__(self,trace:Trace):\n",
-    "        self.trace = trace\n",
-    "        \n",
-    "    def get_url(self) -> str:\n",
-    "        \n",
-    "        server_url = os.getenv(\"MLFLOW_HOST\")\n",
-    "        if not server_url:\n",
-    "            raise ValueError(\"MLFLOW_HOST environment variable is not set.\")\n",
-    "        trace_info = self.trace.info\n",
-    "        server_url = server_url.rstrip('/')\n",
-    "        request_id = trace_info.request_id\n",
-    "        experiment_id = trace_info.experiment_id\n",
-    "        \n",
-    "        # Build the trace URL\n",
-    "        trace_url = (f\"{server_url}/#/experiments/{experiment_id}?\"\n",
-    "                    f\"compareRunsMode=TRACES&\"\n",
-    "                    f\"selectedTraceId={request_id}\")\n",
-    "        \n",
-    "        return trace_url\n",
-    "        \n",
-    "    def get_filter(self, span_name) -> t.List[Span]:\n",
-    "        \n",
-    "        return self.trace.search_spans(name=span_name)    \n",
-    "        "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "async def sync_trace():\n",
-    "    \n",
-    "    trace = get_last_active_trace()\n",
-    "    if trace is None:\n",
-    "        raise ValueError(\"No active trace found.\")\n",
-    "    \n",
-    "    return MLflowTrace(trace)\n",
-    "    "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/api/typing.ipynb b/experimental/old_nbs/api/typing.ipynb
deleted file mode 100644
index 88f9d5848..000000000
--- a/experimental/old_nbs/api/typing.ipynb
+++ /dev/null
@@ -1,671 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp typing"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Typing Module\n",
-    "\n",
-    "> Field Metadata for python's `t.Annotate`."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import typing as t\n",
-    "from enum import Enum\n",
-    "import inspect\n",
-    "from datetime import datetime, date\n",
-    "\n",
-    "from pydantic import BaseModel, create_model\n",
-    "from fastcore.utils import patch\n",
-    "\n",
-    "from ragas_experimental.metric.result import MetricResult"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Supported Types\n",
-    "\n",
-    "Here we have the supported types and meta-types."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "source": "# | export\nimport typing as t\n\n# Define supported backends\nSUPPORTED_BACKENDS = t.Literal[\"ragas_app\", \"local\"]",
-   "metadata": {},
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The API supports the following column types:\n",
-    "\n",
-    "- `number`: Numeric values\n",
-    "- `longText`: Text content\n",
-    "- `select`: Single selection from predefined options\n",
-    "- `date`: Date values\n",
-    "- `multiSelect`: Multiple selections from predefined options\n",
-    "- `checkbox`: Boolean values\n",
-    "- `url`: url fields\n",
-    "- `custom`: Custom column types with specific behavior\n",
-    "\n",
-    "Each column type has specific settings that can be configured through the `settings` object."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class ColumnType(str, Enum):\n",
-    "    \"\"\"Column types supported by the Ragas API.\"\"\"\n",
-    "    NUMBER = \"number\"\n",
-    "    TEXT = \"longText\"\n",
-    "    SELECT = \"select\"\n",
-    "    MULTI_SELECT = \"multiSelect\"\n",
-    "    CHECKBOX = \"checkbox\"\n",
-    "    DATE = \"date\"\n",
-    "    URL = \"url\"\n",
-    "    CUSTOM = \"custom\"\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "DEFAULT_COLUMN_SETTINGS = {\n",
-    "    \"width\": 255,\n",
-    "    \"isVisible\": True,\n",
-    "    \"isEditable\": True,\n",
-    "}\n",
-    "\n",
-    "class FieldMeta:\n",
-    "    \"\"\"Base metadata for field type annotations.\"\"\"\n",
-    "    def __init__(self, type, required=True, id: t.Optional[str]=None, settings: t.Optional[dict]=None):\n",
-    "        self.type = type\n",
-    "        self.required = required\n",
-    "        self.id = id\n",
-    "        self.settings = DEFAULT_COLUMN_SETTINGS.copy()\n",
-    "        # if settings is provided, update the settings\n",
-    "        if settings:\n",
-    "            self.settings.update(settings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Number(FieldMeta):\n",
-    "    \"\"\"Number field metadata.\"\"\"\n",
-    "    def __init__(self, min_value: t.Optional[float] = None, max_value: t.Optional[float] = None, required: bool = True, id: t.Optional[str]=None):\n",
-    "        settings = {}\n",
-    "        if min_value is not None or max_value is not None:\n",
-    "            settings[\"range\"] = {}\n",
-    "            if min_value is not None:\n",
-    "                settings[\"range\"][\"min\"] = min_value\n",
-    "            if max_value is not None:\n",
-    "                settings[\"range\"][\"max\"] = max_value\n",
-    "        super().__init__(ColumnType.NUMBER, required, id, settings=settings)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Text(FieldMeta):\n",
-    "    \"\"\"Text field metadata.\"\"\"\n",
-    "    def __init__(self, max_length: int = 1000, required: bool = True, id: t.Optional[str]=None):\n",
-    "        settings = {}\n",
-    "        if max_length is not None:\n",
-    "            settings[\"max_length\"] = max_length\n",
-    "        super().__init__(ColumnType.TEXT, required, id, settings=settings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "class Url(FieldMeta):\n",
-    "    \"\"\"Url field metadata.\"\"\"\n",
-    "    def __init__(self, required: bool = True, id: t.Optional[str]=None):\n",
-    "        settings = {}\n",
-    "        super().__init__(ColumnType.URL, required, id, settings=settings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "# dict of possible colors for select fields\n",
-    "COLOR_MAP = {\n",
-    "    \"red\": \"hsl(0, 85%, 60%)\",\n",
-    "    \"orange\": \"hsl(30, 85%, 60%)\",\n",
-    "    \"amber\": \"hsl(45, 85%, 60%)\",\n",
-    "    \"yellow\": \"hsl(60, 85%, 60%)\",\n",
-    "    \"lime\": \"hsl(90, 85%, 60%)\",\n",
-    "    \"green\": \"hsl(120, 85%, 60%)\",\n",
-    "    \"teal\": \"hsl(160, 85%, 60%)\",\n",
-    "    \"cyan\": \"hsl(180, 85%, 60%)\",\n",
-    "    \"sky\": \"hsl(200, 85%, 60%)\",\n",
-    "    \"blue\": \"hsl(210, 85%, 60%)\",\n",
-    "    \"indigo\": \"hsl(240, 85%, 60%)\",\n",
-    "    \"violet\": \"hsl(270, 85%, 60%)\",\n",
-    "    \"purple\": \"hsl(280, 85%, 60%)\",\n",
-    "    \"fuchsia\": \"hsl(300, 85%, 60%)\",\n",
-    "    \"pink\": \"hsl(330, 85%, 60%)\"\n",
-    "}\n",
-    "\n",
-    "def get_colors_for_options(options, color_names=None):\n",
-    "    \"\"\"\n",
-    "    Assign colors to options from the COLOR_MAP.\n",
-    "    \n",
-    "    Args:\n",
-    "        options: List of option names\n",
-    "        color_names: Optional list of specific color names to use from COLOR_MAP\n",
-    "                    If None, colors will be assigned in order from COLOR_MAP\n",
-    "    \n",
-    "    Returns:\n",
-    "        List of option objects with name, value, and color properties\n",
-    "    \"\"\"\n",
-    "    if color_names is None:\n",
-    "        # Use colors in order from COLOR_MAP (cycling if needed)\n",
-    "        available_colors = list(COLOR_MAP.values())\n",
-    "        color_values = [available_colors[i % len(available_colors)] for i in range(len(options))]\n",
-    "    else:\n",
-    "        # Use specified colors\n",
-    "        color_values = [COLOR_MAP.get(color, COLOR_MAP[\"blue\"]) for color in color_names]\n",
-    "        # If fewer colors than options, cycle the colors\n",
-    "        if len(color_values) < len(options):\n",
-    "            color_values = [color_values[i % len(color_values)] for i in range(len(options))]\n",
-    "    \n",
-    "    return [{\"name\": option, \"value\": option, \"color\": color_values[i]} \n",
-    "            for i, option in enumerate(options)]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Select(FieldMeta):\n",
-    "    \"\"\"Select field metadata.\"\"\"\n",
-    "    def __init__(\n",
-    "            self,\n",
-    "            options: t.Optional[t.List[str]] = None,\n",
-    "            required: bool = True,\n",
-    "            colors: t.Optional[t.List[str]] = None\n",
-    "        ):\n",
-    "        settings = {}\n",
-    "        \n",
-    "        # store the colors for later use when combining with Literal types\n",
-    "        self.colors = colors\n",
-    "\n",
-    "        if options:\n",
-    "            if colors:\n",
-    "                settings[\"options\"] = get_colors_for_options(options, colors)\n",
-    "            else:\n",
-    "                settings[\"options\"] = get_colors_for_options(options)\n",
-    "        super().__init__(ColumnType.SELECT, required, settings=settings)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class MultiSelect(FieldMeta):\n",
-    "    \"\"\"MultiSelect field metadata.\"\"\"\n",
-    "    def __init__(self, options: t.Optional[t.List[str]] = None, required: bool = True):\n",
-    "        settings = {}\n",
-    "        if options:\n",
-    "            settings[\"options\"] = [{\"name\": option} for option in options]\n",
-    "        super().__init__(ColumnType.MULTI_SELECT, required, settings=settings)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Checkbox(FieldMeta):\n",
-    "    \"\"\"Checkbox field metadata.\"\"\"\n",
-    "    def __init__(self, required: bool = True):\n",
-    "        super().__init__(ColumnType.CHECKBOX, required)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "class Date(FieldMeta):\n",
-    "    \"\"\"Date field metadata.\"\"\"\n",
-    "    def __init__(self, include_time: bool = False, required: bool = True):\n",
-    "        settings = {}\n",
-    "        if include_time:\n",
-    "            settings[\"include_time\"] = include_time\n",
-    "        super().__init__(ColumnType.DATE, required, settings=settings)\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "#| export\n",
-    "class Custom(FieldMeta):\n",
-    "    \"\"\"Custom field metadata.\"\"\"\n",
-    "    def __init__(self, custom_type: str = \"\", required: bool = True):\n",
-    "        settings = {}\n",
-    "        if custom_type:\n",
-    "            settings[\"type\"] = custom_type\n",
-    "        super().__init__(ColumnType.CUSTOM, required, settings=settings)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## ModelConverter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "#| export\n",
-    "class ModelConverter:\n",
-    "    \"\"\"Convert Pydantic models to Ragas API columns and rows.\"\"\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def infer_metric_result_type(field_value):\n",
-    "    \"\"\"Infer field type from a MetricResult instance.\"\"\"\n",
-    "    if field_value is None:\n",
-    "        return Text()\n",
-    "    \n",
-    "    # Infer type based on the _result type\n",
-    "    result_value = field_value._result\n",
-    "    \n",
-    "    if isinstance(result_value, (int, float)):\n",
-    "        return Number()\n",
-    "    elif isinstance(result_value, bool):\n",
-    "        return Checkbox()\n",
-    "    elif isinstance(result_value, (list, tuple)):\n",
-    "        # For ranking metrics that return lists\n",
-    "        return Text()\n",
-    "    else:\n",
-    "        # Default to Text for string or other types\n",
-    "        return Text()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def infer_field_type(annotation, field_info):\n",
-    "    \"\"\"Infer field type from Python type annotation.\"\"\"\n",
-    "    # Check for Annotated with our custom metadata\n",
-    "    origin = t.get_origin(annotation)\n",
-    "    args = t.get_args(annotation)\n",
-    "    \n",
-    "    # Check if this is a MetricResult type\n",
-    "    if annotation is MetricResult or (hasattr(annotation, \"__origin__\") and annotation.__origin__ is MetricResult):\n",
-    "        # Default to Text since we can't determine the result type statically\n",
-    "        return Text()\n",
-    "    \n",
-    "    # If this is an Annotated field then it will have metadata\n",
-    "    if field_info.metadata:\n",
-    "        # Check if we have Select field metadata and base type is Literal\n",
-    "        field_meta = None\n",
-    "        for arg in field_info.metadata:\n",
-    "            if isinstance(arg, FieldMeta):\n",
-    "                field_meta = arg\n",
-    "                break\n",
-    "                \n",
-    "        if field_meta is not None:\n",
-    "            # if it's a URL field, return it\n",
-    "            if isinstance(field_meta, Url):\n",
-    "                return field_meta\n",
-    "\n",
-    "            if isinstance(field_meta, Select) and origin is t.Literal:\n",
-    "                # Special handling for Literal types with Select metadata\n",
-    "                literal_values = list(args)\n",
-    "                \n",
-    "                # If Select has colors but no options, use the literal values as options\n",
-    "                if not field_meta.settings.get(\"options\") and \"colors\" in field_meta.__dict__:\n",
-    "                    colors = field_meta.__dict__[\"colors\"]\n",
-    "                    return Select(options=literal_values, colors=colors)\n",
-    "                \n",
-    "                # If no colors specified, just use literal values as options\n",
-    "                if not field_meta.settings.get(\"options\"):\n",
-    "                    return Select(options=literal_values)\n",
-    "            \n",
-    "            # for any other field metadata, just return the field metadata\n",
-    "            return field_meta\n",
-    "\n",
-    "        # If no field metadata found, infer from the base type\n",
-    "        return infer_field_type(args[0], field_info)\n",
-    "    \n",
-    "    # Handle Optional, List, etc.\n",
-    "    if origin is t.Union:\n",
-    "        if type(None) in args:\n",
-    "            # This is Optional[T]\n",
-    "            non_none_args = [arg for arg in args if arg is not type(None)]\n",
-    "            if len(non_none_args) == 1:\n",
-    "                # Get the field type of the non-None arg\n",
-    "                field_meta = infer_field_type(non_none_args[0], field_info)\n",
-    "                field_meta.required = False\n",
-    "                return field_meta\n",
-    "    \n",
-    "    # Handle List and array types\n",
-    "    # NOTE: here we are converting lists to strings, except for literal types\n",
-    "    if origin is list or origin is t.List:\n",
-    "        if len(args) > 0:\n",
-    "            # Check if it's a list of literals\n",
-    "            if t.get_origin(args[0]) is t.Literal:\n",
-    "                literal_options = t.get_args(args[0])\n",
-    "                return MultiSelect(options=list(literal_options))\n",
-    "            # Otherwise just a regular list\n",
-    "            return Text()  # Default to Text for lists\n",
-    "    \n",
-    "    # Handle Literal\n",
-    "    if origin is t.Literal:\n",
-    "        return Select(options=list(args))\n",
-    "    \n",
-    "    # Basic type handling\n",
-    "    if annotation is str:\n",
-    "        return Text()\n",
-    "    elif annotation is int or annotation is float:\n",
-    "        return Number()\n",
-    "    elif annotation is bool:\n",
-    "        return Checkbox()\n",
-    "    elif annotation is datetime or annotation is date:\n",
-    "        return Date(include_time=annotation is datetime)\n",
-    "    \n",
-    "    # Default to Text for complex or unknown types\n",
-    "    return Text()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch(cls_method=True)\n",
-    "def model_to_columns(cls: ModelConverter, model_class):\n",
-    "    \"\"\"Convert a Pydantic model class to Ragas API column definitions.\"\"\"\n",
-    "    columns = []\n",
-    "    for field_name, field_info in model_class.model_fields.items():\n",
-    "        # Get the field's type annotation\n",
-    "        annotation = field_info.annotation\n",
-    "        \n",
-    "        # Special handling for MetricResult fields\n",
-    "        if (annotation is MetricResult or \n",
-    "            (hasattr(annotation, \"__origin__\") and annotation.__origin__ is MetricResult) or\n",
-    "            (hasattr(field_info, \"annotation\") and str(field_info.annotation).find(\"MetricResult\") != -1)):\n",
-    "            \n",
-    "            # Create column for the result value\n",
-    "            field_meta = infer_field_type(annotation, field_info)\n",
-    "            column = {\n",
-    "                \"id\": field_name,\n",
-    "                \"name\": field_name,\n",
-    "                \"type\": field_meta.type.value,\n",
-    "                \"settings\": field_meta.settings.copy(),\n",
-    "            }\n",
-    "            columns.append(column)\n",
-    "            \n",
-    "            # Create additional column for the reason\n",
-    "            reason_column = {\n",
-    "                \"id\": f\"{field_name}_reason\",\n",
-    "                \"name\": f\"{field_name}_reason\",\n",
-    "                \"type\": ColumnType.TEXT.value,\n",
-    "                \"settings\": Text().settings.copy(),\n",
-    "                \"editable\": True\n",
-    "            }\n",
-    "            columns.append(reason_column)\n",
-    "        else:\n",
-    "            # Regular field handling\n",
-    "            field_meta = infer_field_type(annotation, field_info)\n",
-    "            \n",
-    "            column = {\n",
-    "                \"id\": field_name,\n",
-    "                \"name\": field_name,\n",
-    "                \"type\": field_meta.type.value,\n",
-    "                \"settings\": field_meta.settings,\n",
-    "            }\n",
-    "            \n",
-    "            columns.append(column)\n",
-    "    \n",
-    "    # set the position of the columns\n",
-    "    for i in range(len(columns)):\n",
-    "        columns[i][\"settings\"][\"position\"] = i\n",
-    "    return columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "class TestModel(BaseModel):\n",
-    "    tags: t.Literal[\"test\", \"test2\"]\n",
-    "    tags_with_colors: t.Annotated[t.Literal[\"test\", \"test2\"], Select(colors=[\"red\", \"blue\"])]\n",
-    "    url: t.Annotated[str, Url()]\n",
-    "    score: MetricResult"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "[{'id': 'tags',\n",
-       "  'name': 'tags',\n",
-       "  'type': 'select',\n",
-       "  'settings': {'width': 255,\n",
-       "   'isVisible': True,\n",
-       "   'isEditable': True,\n",
-       "   'options': [{'name': 'test', 'value': 'test', 'color': 'hsl(0, 85%, 60%)'},\n",
-       "    {'name': 'test2', 'value': 'test2', 'color': 'hsl(30, 85%, 60%)'}],\n",
-       "   'position': 0}},\n",
-       " {'id': 'tags_with_colors',\n",
-       "  'name': 'tags_with_colors',\n",
-       "  'type': 'select',\n",
-       "  'settings': {'width': 255,\n",
-       "   'isVisible': True,\n",
-       "   'isEditable': True,\n",
-       "   'options': [{'name': 'test', 'value': 'test', 'color': 'hsl(0, 85%, 60%)'},\n",
-       "    {'name': 'test2', 'value': 'test2', 'color': 'hsl(210, 85%, 60%)'}],\n",
-       "   'position': 1}},\n",
-       " {'id': 'url',\n",
-       "  'name': 'url',\n",
-       "  'type': 'url',\n",
-       "  'settings': {'width': 255,\n",
-       "   'isVisible': True,\n",
-       "   'isEditable': True,\n",
-       "   'position': 2}},\n",
-       " {'id': 'score',\n",
-       "  'name': 'score',\n",
-       "  'type': 'longText',\n",
-       "  'settings': {'width': 255,\n",
-       "   'isVisible': True,\n",
-       "   'isEditable': True,\n",
-       "   'max_length': 1000,\n",
-       "   'position': 3}},\n",
-       " {'id': 'score_reason',\n",
-       "  'name': 'score_reason',\n",
-       "  'type': 'longText',\n",
-       "  'settings': {'width': 255,\n",
-       "   'isVisible': True,\n",
-       "   'isEditable': True,\n",
-       "   'max_length': 1000,\n",
-       "   'position': 4},\n",
-       "  'editable': True}]"
-      ]
-     },
-     "execution_count": null,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "ModelConverter.model_to_columns(TestModel)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export \n",
-    "@patch(cls_method=True)\n",
-    "def instance_to_row(cls: ModelConverter, instance, model_class=None):\n",
-    "    \"\"\"Convert a Pydantic model instance to a Ragas API row.\"\"\"\n",
-    "    if model_class is None:\n",
-    "        model_class = instance.__class__\n",
-    "    \n",
-    "    row_cells = []\n",
-    "    model_data = instance.model_dump()\n",
-    "    \n",
-    "    for field_name, field_info in model_class.model_fields.items():\n",
-    "        if field_name in model_data:\n",
-    "            value = model_data[field_name]\n",
-    "            # Get the field's type annotation\n",
-    "            annotation = field_info.annotation\n",
-    "            \n",
-    "            # Special handling for MetricResult fields\n",
-    "            if isinstance(value, MetricResult):\n",
-    "                # Process the result value\n",
-    "                field_meta = infer_metric_result_type(value)\n",
-    "                processed_value = value._result\n",
-    "                \n",
-    "                # Add result cell\n",
-    "                row_cells.append({\n",
-    "                    \"column_id\": field_name,\n",
-    "                    \"data\": processed_value\n",
-    "                })\n",
-    "                \n",
-    "                # Add reason cell\n",
-    "                row_cells.append({\n",
-    "                    \"column_id\": f\"{field_name}_reason\",\n",
-    "                    \"data\": value.reason\n",
-    "                })\n",
-    "            else:\n",
-    "                # Regular field handling\n",
-    "                field_meta = infer_field_type(annotation, field_info)\n",
-    "                \n",
-    "                # Special handling for various types\n",
-    "                if field_meta.type == ColumnType.MULTI_SELECT and isinstance(value, list):\n",
-    "                    # Convert list to string format accepted by API\n",
-    "                    processed_value = value\n",
-    "                elif field_meta.type == ColumnType.DATE and isinstance(value, (datetime, date)):\n",
-    "                    # Format date as string\n",
-    "                    processed_value = value.isoformat()\n",
-    "                else:\n",
-    "                    processed_value = value\n",
-    "                \n",
-    "                row_cells.append({\n",
-    "                    \"column_id\": field_name,\n",
-    "                    \"data\": processed_value\n",
-    "                })\n",
-    "    \n",
-    "    return {\n",
-    "        \"data\": row_cells\n",
-    "    }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "@patch(cls_method=True)\n",
-    "def instances_to_rows(cls: ModelConverter, instances, model_class=None):\n",
-    "    \"\"\"Convert multiple Pydantic model instances to Ragas API rows.\"\"\"\n",
-    "    if not instances:\n",
-    "        return []\n",
-    "    \n",
-    "    if model_class is None and instances:\n",
-    "        model_class = instances[0].__class__\n",
-    "    \n",
-    "    return [cls.instance_to_row(instance, model_class) for instance in instances]"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
\ No newline at end of file
diff --git a/experimental/old_nbs/api/utils.ipynb b/experimental/old_nbs/api/utils.ipynb
deleted file mode 100644
index 87fab87fc..000000000
--- a/experimental/old_nbs/api/utils.ipynb
+++ /dev/null
@@ -1,1304 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| default_exp utils"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Utils"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import string\n",
-    "import uuid\n",
-    "import functools\n",
-    "import asyncio"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "def create_nano_id(size=12):\n",
-    "    # Define characters to use (alphanumeric)\n",
-    "    alphabet = string.ascii_letters + string.digits\n",
-    "    \n",
-    "    # Generate UUID and convert to int\n",
-    "    uuid_int = uuid.uuid4().int\n",
-    "    \n",
-    "    # Convert to base62\n",
-    "    result = \"\"\n",
-    "    while uuid_int:\n",
-    "        uuid_int, remainder = divmod(uuid_int, len(alphabet))\n",
-    "        result = alphabet[remainder] + result\n",
-    "    \n",
-    "    # Pad if necessary and return desired length\n",
-    "    return result[:size]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "def async_to_sync(async_func):\n",
-    "    \"\"\"Convert an async function to a sync function\"\"\"\n",
-    "    @functools.wraps(async_func)\n",
-    "    def sync_wrapper(*args, **kwargs):\n",
-    "        try:\n",
-    "            loop = asyncio.get_event_loop()\n",
-    "            if loop.is_running():\n",
-    "                import concurrent.futures\n",
-    "                with concurrent.futures.ThreadPoolExecutor() as executor:\n",
-    "                    future = executor.submit(asyncio.run, async_func(*args, **kwargs))\n",
-    "                    return future.result()\n",
-    "            else:\n",
-    "                return loop.run_until_complete(async_func(*args, **kwargs))\n",
-    "        except RuntimeError:\n",
-    "            return asyncio.run(async_func(*args, **kwargs))\n",
-    "    return sync_wrapper"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "\n",
-    "import numpy as np\n",
-    "import plotly.graph_objects as go\n",
-    "from plotly.subplots import make_subplots\n",
-    "from collections import Counter\n",
-    "\n",
-    "def plot_experiments_as_subplots(data, experiment_names=None):\n",
-    "    \"\"\"\n",
-    "    Plot metrics comparison across experiments.\n",
-    "    \n",
-    "    Parameters:\n",
-    "    - data: Dictionary with experiment_names as keys and metrics as nested dictionaries\n",
-    "    - experiment_names: List of experiment IDs in the order they should be plotted\n",
-    "    \n",
-    "    Returns:\n",
-    "    - Plotly figure object with horizontal subplots\n",
-    "    \"\"\"\n",
-    "    if experiment_names is None:\n",
-    "        experiment_names = list(data.keys())\n",
-    "    \n",
-    "    exp_short_names = [f\"{name[:10]}..\"for name in experiment_names]\n",
-    "    #TODO: need better solution to identify what type of metric it is\n",
-    "    # this is a temporary solution\n",
-    "    # Identify metrics and their types\n",
-    "    metrics = {}\n",
-    "    for exp_id in experiment_names:\n",
-    "        for metric_name, values in data[exp_id].items():\n",
-    "            # Classify metric type (discrete or numerical)\n",
-    "            if metric_name not in metrics:\n",
-    "                # Check first value to determine type\n",
-    "                is_discrete = isinstance(values[0], str)\n",
-    "                metrics[metric_name] = {\"type\": \"discrete\" if is_discrete else \"numerical\"}\n",
-    "    \n",
-    "    # Create horizontal subplots (one for each metric)\n",
-    "    fig = make_subplots(\n",
-    "        rows=1, \n",
-    "        cols=len(metrics),\n",
-    "        subplot_titles=[f\"{metric.capitalize()} Comparison\" for metric in metrics.keys()],\n",
-    "        horizontal_spacing=0.1\n",
-    "    )\n",
-    "    \n",
-    "    # Process metrics and add traces\n",
-    "    col_idx = 1\n",
-    "    for metric_name, metric_info in metrics.items():\n",
-    "        if metric_info[\"type\"] == \"discrete\":\n",
-    "            # For discrete metrics (like pass/fail)\n",
-    "            categories = set()\n",
-    "            for exp_id in experiment_names:\n",
-    "                count = Counter(data[exp_id][metric_name])\n",
-    "                categories.update(count.keys())\n",
-    "            \n",
-    "            categories = sorted(list(categories))\n",
-    "            \n",
-    "            for category in categories:\n",
-    "                y_values = []\n",
-    "                for exp_id in experiment_names:\n",
-    "                    count = Counter(data[exp_id][metric_name])\n",
-    "                    total = sum(count.values())\n",
-    "                    percentage = (count.get(category, 0) / total) * 100\n",
-    "                    y_values.append(percentage)\n",
-    "                \n",
-    "                # Assign colors based on category\n",
-    "                \n",
-    "                # Generate consistent color for other categories\n",
-    "                import hashlib\n",
-    "                hash_obj = hashlib.md5(category.encode())\n",
-    "                hash_hex = hash_obj.hexdigest()\n",
-    "                color = f\"#{hash_hex[:6]}\"\n",
-    "                \n",
-    "                fig.add_trace(\n",
-    "                    go.Bar(\n",
-    "                        x=exp_short_names,\n",
-    "                        y=y_values,\n",
-    "                        name=category.capitalize(),\n",
-    "                        marker_color=color,\n",
-    "                        width=0.5,  # Narrower bars\n",
-    "                        hoverinfo='text',\n",
-    "                        hovertext=[f\"{category.capitalize()}: {x:.1f}%\" for x in y_values],\n",
-    "                        showlegend=False  # Remove legend\n",
-    "                    ),\n",
-    "                    row=1, col=col_idx\n",
-    "                )\n",
-    "            \n",
-    "        else:  # Numerical metrics\n",
-    "            normalized_values = []\n",
-    "            original_values = []\n",
-    "            \n",
-    "            for exp_id in experiment_names:\n",
-    "                values = data[exp_id][metric_name]\n",
-    "                mean_val = np.mean(values)\n",
-    "                original_values.append(mean_val)\n",
-    "                \n",
-    "                # Normalize to 0-100 scale\n",
-    "                min_val = np.min(values)\n",
-    "                max_val = np.max(values)\n",
-    "                normalized = ((mean_val - min_val) / (max_val - min_val)) * 100\n",
-    "                normalized_values.append(normalized)\n",
-    "            \n",
-    "            # Add bar chart for numerical data\n",
-    "            fig.add_trace(\n",
-    "                go.Bar(\n",
-    "                    x=exp_short_names,\n",
-    "                    y=normalized_values,\n",
-    "                    name=metric_name.capitalize(),\n",
-    "                    marker_color='#2E8B57',  # Sea green\n",
-    "                    width=0.5,  # Narrower bars\n",
-    "                    hoverinfo='text',\n",
-    "                    hovertext=[f\"{metric_name.capitalize()} Mean: {val:.2f} (Normalized: {norm:.1f}%)\" \n",
-    "                              for val, norm in zip(original_values, normalized_values)],\n",
-    "                    showlegend=False  # Remove legend\n",
-    "                ),\n",
-    "                row=1, col=col_idx\n",
-    "            )\n",
-    "        \n",
-    "        # Update axes for each subplot\n",
-    "        fig.update_yaxes(\n",
-    "            title_text=\"Percentage (%)\" if metric_info[\"type\"] == \"discrete\" else \"Normalized Value\",\n",
-    "            range=[0, 105],  # Leave room for labels at the top\n",
-    "            ticksuffix=\"%\",\n",
-    "            showgrid=True,\n",
-    "            gridcolor='lightgray',\n",
-    "            showline=True,\n",
-    "            linewidth=1,\n",
-    "            linecolor='black',\n",
-    "            row=1, col=col_idx\n",
-    "        )\n",
-    "        \n",
-    "        fig.update_xaxes(\n",
-    "            title_text=\"Experiments\",\n",
-    "            tickangle=-45,\n",
-    "            showgrid=False,\n",
-    "            showline=True,\n",
-    "            linewidth=1,\n",
-    "            linecolor='black',\n",
-    "            row=1, col=col_idx\n",
-    "        )\n",
-    "        \n",
-    "        col_idx += 1\n",
-    "    \n",
-    "    # Update layout for the entire figure\n",
-    "    fig.update_layout(\n",
-    "        title='Experiment Comparison by Metrics',\n",
-    "        barmode='stack' if any(metric_info[\"type\"] == \"discrete\" for metric_info in metrics.values()) else 'group',\n",
-    "        height=400,  # Reduced height\n",
-    "        width=250 * len(metrics) + 150,  # Adjust width based on number of metrics\n",
-    "        showlegend=False,  # Remove legend\n",
-    "        margin=dict(t=80, b=50, l=50, r=50),\n",
-    "        plot_bgcolor='white',\n",
-    "        hovermode='closest'\n",
-    "    )\n",
-    "    \n",
-    "    return fig\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.plotly.v1+json": {
-       "config": {
-        "plotlyServerURL": "https://plot.ly"
-       },
-       "data": [
-        {
-         "hoverinfo": "text",
-         "hovertext": [
-          "Fail: 50.0%",
-          "Fail: 33.3%"
-         ],
-         "marker": {
-          "color": "#e11185"
-         },
-         "name": "Fail",
-         "showlegend": false,
-         "type": "bar",
-         "width": 0.5,
-         "x": [
-          "my-first-e..",
-          "my-second-.."
-         ],
-         "xaxis": "x",
-         "y": [
-          50,
-          33.33333333333333
-         ],
-         "yaxis": "y"
-        },
-        {
-         "hoverinfo": "text",
-         "hovertext": [
-          "Pass: 50.0%",
-          "Pass: 66.7%"
-         ],
-         "marker": {
-          "color": "#1a1dc9"
-         },
-         "name": "Pass",
-         "showlegend": false,
-         "type": "bar",
-         "width": 0.5,
-         "x": [
-          "my-first-e..",
-          "my-second-.."
-         ],
-         "xaxis": "x",
-         "y": [
-          50,
-          66.66666666666666
-         ],
-         "yaxis": "y"
-        },
-        {
-         "hoverinfo": "text",
-         "hovertext": [
-          "Positivity Mean: 5.67 (Normalized: 51.9%)",
-          "Positivity Mean: 6.23 (Normalized: 52.9%)"
-         ],
-         "marker": {
-          "color": "#2E8B57"
-         },
-         "name": "Positivity",
-         "showlegend": false,
-         "type": "bar",
-         "width": 0.5,
-         "x": [
-          "my-first-e..",
-          "my-second-.."
-         ],
-         "xaxis": "x2",
-         "y": [
-          51.85185185185186,
-          52.916666666666664
-         ],
-         "yaxis": "y2"
-        }
-       ],
-       "layout": {
-        "annotations": [
-         {
-          "font": {
-           "size": 16
-          },
-          "showarrow": false,
-          "text": "Correctness Comparison",
-          "x": 0.225,
-          "xanchor": "center",
-          "xref": "paper",
-          "y": 1,
-          "yanchor": "bottom",
-          "yref": "paper"
-         },
-         {
-          "font": {
-           "size": 16
-          },
-          "showarrow": false,
-          "text": "Positivity Comparison",
-          "x": 0.775,
-          "xanchor": "center",
-          "xref": "paper",
-          "y": 1,
-          "yanchor": "bottom",
-          "yref": "paper"
-         }
-        ],
-        "barmode": "stack",
-        "height": 400,
-        "hovermode": "closest",
-        "margin": {
-         "b": 50,
-         "l": 50,
-         "r": 50,
-         "t": 80
-        },
-        "plot_bgcolor": "white",
-        "showlegend": false,
-        "template": {
-         "data": {
-          "bar": [
-           {
-            "error_x": {
-             "color": "#2a3f5f"
-            },
-            "error_y": {
-             "color": "#2a3f5f"
-            },
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "bar"
-           }
-          ],
-          "barpolar": [
-           {
-            "marker": {
-             "line": {
-              "color": "#E5ECF6",
-              "width": 0.5
-             },
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "barpolar"
-           }
-          ],
-          "carpet": [
-           {
-            "aaxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "baxis": {
-             "endlinecolor": "#2a3f5f",
-             "gridcolor": "white",
-             "linecolor": "white",
-             "minorgridcolor": "white",
-             "startlinecolor": "#2a3f5f"
-            },
-            "type": "carpet"
-           }
-          ],
-          "choropleth": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "choropleth"
-           }
-          ],
-          "contour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "contour"
-           }
-          ],
-          "contourcarpet": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "contourcarpet"
-           }
-          ],
-          "heatmap": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "heatmap"
-           }
-          ],
-          "histogram": [
-           {
-            "marker": {
-             "pattern": {
-              "fillmode": "overlay",
-              "size": 10,
-              "solidity": 0.2
-             }
-            },
-            "type": "histogram"
-           }
-          ],
-          "histogram2d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2d"
-           }
-          ],
-          "histogram2dcontour": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "histogram2dcontour"
-           }
-          ],
-          "mesh3d": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "type": "mesh3d"
-           }
-          ],
-          "parcoords": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "parcoords"
-           }
-          ],
-          "pie": [
-           {
-            "automargin": true,
-            "type": "pie"
-           }
-          ],
-          "scatter": [
-           {
-            "fillpattern": {
-             "fillmode": "overlay",
-             "size": 10,
-             "solidity": 0.2
-            },
-            "type": "scatter"
-           }
-          ],
-          "scatter3d": [
-           {
-            "line": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatter3d"
-           }
-          ],
-          "scattercarpet": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattercarpet"
-           }
-          ],
-          "scattergeo": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergeo"
-           }
-          ],
-          "scattergl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattergl"
-           }
-          ],
-          "scattermap": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermap"
-           }
-          ],
-          "scattermapbox": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scattermapbox"
-           }
-          ],
-          "scatterpolar": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolar"
-           }
-          ],
-          "scatterpolargl": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterpolargl"
-           }
-          ],
-          "scatterternary": [
-           {
-            "marker": {
-             "colorbar": {
-              "outlinewidth": 0,
-              "ticks": ""
-             }
-            },
-            "type": "scatterternary"
-           }
-          ],
-          "surface": [
-           {
-            "colorbar": {
-             "outlinewidth": 0,
-             "ticks": ""
-            },
-            "colorscale": [
-             [
-              0,
-              "#0d0887"
-             ],
-             [
-              0.1111111111111111,
-              "#46039f"
-             ],
-             [
-              0.2222222222222222,
-              "#7201a8"
-             ],
-             [
-              0.3333333333333333,
-              "#9c179e"
-             ],
-             [
-              0.4444444444444444,
-              "#bd3786"
-             ],
-             [
-              0.5555555555555556,
-              "#d8576b"
-             ],
-             [
-              0.6666666666666666,
-              "#ed7953"
-             ],
-             [
-              0.7777777777777778,
-              "#fb9f3a"
-             ],
-             [
-              0.8888888888888888,
-              "#fdca26"
-             ],
-             [
-              1,
-              "#f0f921"
-             ]
-            ],
-            "type": "surface"
-           }
-          ],
-          "table": [
-           {
-            "cells": {
-             "fill": {
-              "color": "#EBF0F8"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "header": {
-             "fill": {
-              "color": "#C8D4E3"
-             },
-             "line": {
-              "color": "white"
-             }
-            },
-            "type": "table"
-           }
-          ]
-         },
-         "layout": {
-          "annotationdefaults": {
-           "arrowcolor": "#2a3f5f",
-           "arrowhead": 0,
-           "arrowwidth": 1
-          },
-          "autotypenumbers": "strict",
-          "coloraxis": {
-           "colorbar": {
-            "outlinewidth": 0,
-            "ticks": ""
-           }
-          },
-          "colorscale": {
-           "diverging": [
-            [
-             0,
-             "#8e0152"
-            ],
-            [
-             0.1,
-             "#c51b7d"
-            ],
-            [
-             0.2,
-             "#de77ae"
-            ],
-            [
-             0.3,
-             "#f1b6da"
-            ],
-            [
-             0.4,
-             "#fde0ef"
-            ],
-            [
-             0.5,
-             "#f7f7f7"
-            ],
-            [
-             0.6,
-             "#e6f5d0"
-            ],
-            [
-             0.7,
-             "#b8e186"
-            ],
-            [
-             0.8,
-             "#7fbc41"
-            ],
-            [
-             0.9,
-             "#4d9221"
-            ],
-            [
-             1,
-             "#276419"
-            ]
-           ],
-           "sequential": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ],
-           "sequentialminus": [
-            [
-             0,
-             "#0d0887"
-            ],
-            [
-             0.1111111111111111,
-             "#46039f"
-            ],
-            [
-             0.2222222222222222,
-             "#7201a8"
-            ],
-            [
-             0.3333333333333333,
-             "#9c179e"
-            ],
-            [
-             0.4444444444444444,
-             "#bd3786"
-            ],
-            [
-             0.5555555555555556,
-             "#d8576b"
-            ],
-            [
-             0.6666666666666666,
-             "#ed7953"
-            ],
-            [
-             0.7777777777777778,
-             "#fb9f3a"
-            ],
-            [
-             0.8888888888888888,
-             "#fdca26"
-            ],
-            [
-             1,
-             "#f0f921"
-            ]
-           ]
-          },
-          "colorway": [
-           "#636efa",
-           "#EF553B",
-           "#00cc96",
-           "#ab63fa",
-           "#FFA15A",
-           "#19d3f3",
-           "#FF6692",
-           "#B6E880",
-           "#FF97FF",
-           "#FECB52"
-          ],
-          "font": {
-           "color": "#2a3f5f"
-          },
-          "geo": {
-           "bgcolor": "white",
-           "lakecolor": "white",
-           "landcolor": "#E5ECF6",
-           "showlakes": true,
-           "showland": true,
-           "subunitcolor": "white"
-          },
-          "hoverlabel": {
-           "align": "left"
-          },
-          "hovermode": "closest",
-          "mapbox": {
-           "style": "light"
-          },
-          "paper_bgcolor": "white",
-          "plot_bgcolor": "#E5ECF6",
-          "polar": {
-           "angularaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "radialaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "scene": {
-           "xaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "yaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           },
-           "zaxis": {
-            "backgroundcolor": "#E5ECF6",
-            "gridcolor": "white",
-            "gridwidth": 2,
-            "linecolor": "white",
-            "showbackground": true,
-            "ticks": "",
-            "zerolinecolor": "white"
-           }
-          },
-          "shapedefaults": {
-           "line": {
-            "color": "#2a3f5f"
-           }
-          },
-          "ternary": {
-           "aaxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "baxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           },
-           "bgcolor": "#E5ECF6",
-           "caxis": {
-            "gridcolor": "white",
-            "linecolor": "white",
-            "ticks": ""
-           }
-          },
-          "title": {
-           "x": 0.05
-          },
-          "xaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          },
-          "yaxis": {
-           "automargin": true,
-           "gridcolor": "white",
-           "linecolor": "white",
-           "ticks": "",
-           "title": {
-            "standoff": 15
-           },
-           "zerolinecolor": "white",
-           "zerolinewidth": 2
-          }
-         }
-        },
-        "title": {
-         "text": "Experiment Comparison by Metrics"
-        },
-        "width": 650,
-        "xaxis": {
-         "anchor": "y",
-         "domain": [
-          0,
-          0.45
-         ],
-         "linecolor": "black",
-         "linewidth": 1,
-         "showgrid": false,
-         "showline": true,
-         "tickangle": -45,
-         "title": {
-          "text": "Experiments"
-         }
-        },
-        "xaxis2": {
-         "anchor": "y2",
-         "domain": [
-          0.55,
-          1
-         ],
-         "linecolor": "black",
-         "linewidth": 1,
-         "showgrid": false,
-         "showline": true,
-         "tickangle": -45,
-         "title": {
-          "text": "Experiments"
-         }
-        },
-        "yaxis": {
-         "anchor": "x",
-         "domain": [
-          0,
-          1
-         ],
-         "gridcolor": "lightgray",
-         "linecolor": "black",
-         "linewidth": 1,
-         "range": [
-          0,
-          105
-         ],
-         "showgrid": true,
-         "showline": true,
-         "ticksuffix": "%",
-         "title": {
-          "text": "Percentage (%)"
-         }
-        },
-        "yaxis2": {
-         "anchor": "x2",
-         "domain": [
-          0,
-          1
-         ],
-         "gridcolor": "lightgray",
-         "linecolor": "black",
-         "linewidth": 1,
-         "range": [
-          0,
-          105
-         ],
-         "showgrid": true,
-         "showline": true,
-         "ticksuffix": "%",
-         "title": {
-          "text": "Normalized Value"
-         }
-        }
-       }
-      }
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# Provided sample data\n",
-    "data = {\n",
-    "    'my-first-experiment': {\n",
-    "        'correctness': ['pass', 'fail', 'fail', 'fail', 'fail', 'pass', 'fail', \n",
-    "                  'pass', 'fail', 'fail', 'fail', 'pass', 'pass', 'pass', \n",
-    "                  'pass', 'fail', 'pass', 'fail', 'pass', 'pass', 'pass', \n",
-    "                  'fail', 'fail', 'pass', 'pass', 'pass', 'pass', 'fail', \n",
-    "                  'fail', 'fail'],\n",
-    "        'positivity': [\n",
-    "            7, 3, 8, 2, 4, 9, 3, 8, 7, 6, \n",
-    "            9, 7, 8, 10, 1, 8, 9, 4, 8, 1, \n",
-    "            9, 3, 2, 1, 1, 9, 8, 4, 3, 8\n",
-    "        ]\n",
-    "    },\n",
-    "    'my-second-experiment': {\n",
-    "        'correctness': ['pass', 'pass', 'pass', 'fail', 'pass', 'pass', 'pass', \n",
-    "                  'pass', 'fail', 'pass', 'pass', 'pass', 'fail', 'pass', \n",
-    "                  'pass', 'pass', 'pass', 'pass', 'pass', 'pass', 'fail', \n",
-    "                  'pass', 'fail', 'fail', 'pass', 'fail', 'pass', 'fail', \n",
-    "                  'fail', 'fail'],\n",
-    "        'positivity': [\n",
-    "            6, 8, 7, 3, 8, 7, 9, 8, 2, 7, \n",
-    "            6, 8, 4, 9, 8, 7, 10, 9, 8, 9, \n",
-    "            3, 8, 4, 2, 7, 3, 8, 4, 2, 3\n",
-    "        ]\n",
-    "    }\n",
-    "}\n",
-    "\n",
-    "\n",
-    "# Plot the comparison\n",
-    "experiment_names = ['my-first-experiment', 'my-second-experiment',]\n",
-    "fig = plot_experiments_as_subplots(data, experiment_names)\n",
-    "\n",
-    "# Show the figure\n",
-    "fig.show()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "import tempfile\n",
-    "import os"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| export\n",
-    "# Helper function for tests\n",
-    "def get_test_directory():\n",
-    "    \"\"\"Create a test directory that will be cleaned up on process exit.\n",
-    "    \n",
-    "    Returns:\n",
-    "        str: Path to test directory\n",
-    "    \"\"\"\n",
-    "    # Create a directory in the system temp directory\n",
-    "    test_dir = os.path.join(tempfile.gettempdir(), f\"ragas_test_{create_nano_id()}\")\n",
-    "    os.makedirs(test_dir, exist_ok=True)\n",
-    "    \n",
-    "    return test_dir"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "python3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/explanation/index.ipynb b/experimental/old_nbs/explanation/index.ipynb
deleted file mode 100644
index a4465bc52..000000000
--- a/experimental/old_nbs/explanation/index.ipynb
+++ /dev/null
@@ -1,49 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Explanation\n",
-    "\n",
-    "> Understanding-oriented documentation for Ragas Experimental"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "source": [
-    "#| hide\n",
-    "from ragas_experimental import *"
-   ],
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Welcome to Explanation Section\n",
-    "\n",
-    "The explanation section provides understanding-oriented documentation about Ragas Experimental. Here you'll find in-depth discussions about concepts, architecture decisions, and the theoretical foundation of the library.\n",
-    "\n",
-    "Unlike tutorials or how-to guides, these documents focus on explaining why things work the way they do and the underlying principles.\n",
-    "\n",
-    "### Available Explanations\n",
-    "\n",
-    "- Pydantic Models in Ragas\n",
-    "- Type System Overview\n",
-    "- Prompt Engineering Architecture"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
\ No newline at end of file
diff --git a/experimental/old_nbs/explanation/litellm.ipynb b/experimental/old_nbs/explanation/litellm.ipynb
deleted file mode 100644
index a1fd2fc2c..000000000
--- a/experimental/old_nbs/explanation/litellm.ipynb
+++ /dev/null
@@ -1,958 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ModelResponse(id='chatcmpl-BcLCskMkvDygCWIT9fqZVXhUjEuST', created=1748477682, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_a288987b44', choices=[Choices(finish_reason='stop', index=0, message=Message(content='Hello! I’m just a program, so I don’t have feelings, but I’m here and ready to help you with whatever you need. How can I assist you today?', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'refusal': None}, annotations=[]))], usage=Usage(completion_tokens=36, prompt_tokens=13, total_tokens=49, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)), service_tier='default')\n"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import completion\n",
-    "import os\n",
-    "\n",
-    "## set ENV variables\n",
-    "#os.environ[\"OPENAI_API_KEY\"] = \"your-api-key\"\n",
-    "\n",
-    "response = completion(\n",
-    "  model=\"openai/gpt-4o\",\n",
-    "  messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}]\n",
-    ")\n",
-    "\n",
-    "print(response)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Response(id='resp_6837a6f34cbc8191abd297b8f111657d0502e1096ce8403a', created_at=1748477683.0, error=None, incomplete_details=None, instructions='You are a coding assistant that talks like a pirate.', metadata={}, model='gpt-4o-2024-08-06', object='response', output=[ResponseOutputMessage(id='msg_6837a6f40900819194d2bc197b88efb70502e1096ce8403a', content=[ResponseOutputText(annotations=[], text='Arrr, matey! To check if a Python object be an instance of a class, ye can use the `isinstance()` function. Here be how ye do it:\\n\\n```python\\nif isinstance(your_object, YourClass):\\n    print(\"Aye, \\'tis an instance!\")\\nelse:\\n    print(\"Nay, it be not!\")\\n```\\n\\nJust replace `your_object` with yer object and `YourClass` with the class ye be checkin\\'. Yarrr!', type='output_text')], role='assistant', status='completed', type='message')], parallel_tool_calls=True, temperature=1.0, tool_choice='auto', tools=[], top_p=1.0, background=False, max_output_tokens=None, previous_response_id=None, reasoning=Reasoning(effort=None, generate_summary=None, summary=None), service_tier='default', status='completed', text=ResponseTextConfig(format=ResponseFormatText(type='text')), truncation='disabled', usage=ResponseUsage(input_tokens=37, input_tokens_details=InputTokensDetails(cached_tokens=0), output_tokens=99, output_tokens_details=OutputTokensDetails(reasoning_tokens=0), total_tokens=136), user=None, store=True)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from openai import OpenAI\n",
-    "\n",
-    "client = OpenAI(\n",
-    "    # This is the default and can be omitted\n",
-    "    api_key=os.environ.get(\"OPENAI_API_KEY\"),\n",
-    ")\n",
-    "\n",
-    "response = client.responses.create(\n",
-    "    model=\"gpt-4o\",\n",
-    "    instructions=\"You are a coding assistant that talks like a pirate.\",\n",
-    "    input=\"How do I check if a Python object is an instance of a class?\",\n",
-    ")\n",
-    "\n",
-    "print(response)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ModelResponse(id='chatcmpl-cadd507e-206e-49a6-a903-ed05c3dddec5', created=1748477687, model='claude-3-sonnet-20240229', object='chat.completion', system_fingerprint=None, choices=[Choices(finish_reason='stop', index=0, message=Message(content=\"Hello! As an AI language model, I don't have feelings, but I'm operating properly and ready to assist you with any questions or tasks you may have. How can I help you today?\", role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'citations': None, 'thinking_blocks': None}))], usage=Usage(completion_tokens=43, prompt_tokens=13, total_tokens=56, completion_tokens_details=None, prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=None, cached_tokens=0, text_tokens=None, image_tokens=None), cache_creation_input_tokens=0, cache_read_input_tokens=0))\n"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import completion\n",
-    "import os\n",
-    "\n",
-    "response = completion(\n",
-    "  model=\"anthropic/claude-3-sonnet-20240229\",\n",
-    "  messages=[{ \"content\": \"Hello, how are you?\",\"role\": \"user\"}],\n",
-    ")\n",
-    "print(response)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from litellm.utils import supports_function_calling\n",
-    "\n",
-    "# assert supports_function_calling(model=\"gpt-3.5-turbo\") == True\n",
-    "# assert supports_function_calling(model=\"azure/gpt-4-1106-preview\") == True\n",
-    "# assert supports_function_calling(model=\"palm/chat-bison\") == False\n",
-    "# assert supports_function_calling(model=\"xai/grok-2-latest\") == True\n",
-    "supports_function_calling(model=\"anthropic/claude-3.2-sonnet-20240229\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "  \"year\": 2020,\n",
-      "  \"world_series_winner\": \"Los Angeles Dodgers\"\n",
-      "}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import completion\n",
-    "import os \n",
-    "\n",
-    "response = completion(\n",
-    "  model=\"gpt-4o-mini\",\n",
-    "  response_format={ \"type\": \"json_object\" },\n",
-    "  messages=[\n",
-    "    {\"role\": \"system\", \"content\": \"You are a helpful assistant designed to output JSON.\"},\n",
-    "    {\"role\": \"user\", \"content\": \"Who won the world series in 2020?\"}\n",
-    "  ]\n",
-    ")\n",
-    "print(response.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\u001b[1;31mProvider List: https://docs.litellm.ai/docs/providers\u001b[0m\n",
-      "\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['stream',\n",
-       " 'stop',\n",
-       " 'temperature',\n",
-       " 'top_p',\n",
-       " 'max_tokens',\n",
-       " 'max_completion_tokens',\n",
-       " 'tools',\n",
-       " 'tool_choice',\n",
-       " 'extra_headers',\n",
-       " 'parallel_tool_calls',\n",
-       " 'response_format',\n",
-       " 'user',\n",
-       " 'reasoning_effort',\n",
-       " 'web_search_options']"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from litellm.utils import get_supported_openai_params\n",
-    "\n",
-    "params = get_supported_openai_params(model=\"anthropic.claude-3\", custom_llm_provider=\"bedrock\")\n",
-    "\n",
-    "params"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from litellm.utils import supports_response_schema\n",
-    "\n",
-    "supports_response_schema(model=\"anthropic.claude-3\", custom_llm_provider=\"bedrock\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "from litellm import completion \n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "messages = [{\"role\": \"user\", \"content\": \"List 5 important events in the XIX century\"}]\n",
-    "\n",
-    "class CalendarEvent(BaseModel):\n",
-    "  name: str\n",
-    "  date: str\n",
-    "  participants: list[str]\n",
-    "\n",
-    "class EventsList(BaseModel):\n",
-    "    events: list[CalendarEvent]\n",
-    "\n",
-    "resp = completion(\n",
-    "    model=\"gpt-4o-2024-08-06\",\n",
-    "    messages=messages,\n",
-    "    response_format=EventsList\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"events\":[{\"name\":\"Napoleonic Wars\",\"date\":\"1803-1815\",\"participants\":[\"Napoleon Bonaparte\",\"United Kingdom\",\"Russia\",\"Prussia\",\"Austria\"]},{\"name\":\"The Congress of Vienna\",\"date\":\"1814-1815\",\"participants\":[\"European Powers\",\"Metternich\",\"Talleyrand\",\"Castlereagh\"]},{\"name\":\"The Industrial Revolution\",\"date\":\"Approx. 1760-1840\",\"participants\":[\"United Kingdom\",\"Europe\",\"United States\"]},{\"name\":\"American Civil War\",\"date\":\"1861-1865\",\"participants\":[\"Union (North)\",\"Confederacy (South)\"]},{\"name\":\"Women's suffrage movement\",\"date\":\"Late 19th century\",\"participants\":[\"Suffragettes\",\"United Kingdom\",\"United States\",\"Women's Rights Activists\"]}]}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Received=ModelResponse(id='chatcmpl-BcLD25Y7btqLT3q7xbJF64aRjDpdU', created=1748477692, model='gpt-4o-2024-08-06', object='chat.completion', system_fingerprint='fp_9bddfca6e2', choices=[Choices(finish_reason='stop', index=0, message=Message(content='{\"name\":\"Science Fair\",\"date\":\"Friday\",\"participants\":[\"Alice\",\"Bob\"]}', role='assistant', tool_calls=None, function_call=None, provider_specific_fields={'refusal': None}, annotations=[]))], usage=Usage(completion_tokens=17, prompt_tokens=92, total_tokens=109, completion_tokens_details=CompletionTokensDetailsWrapper(accepted_prediction_tokens=0, audio_tokens=0, reasoning_tokens=0, rejected_prediction_tokens=0, text_tokens=None), prompt_tokens_details=PromptTokensDetailsWrapper(audio_tokens=0, cached_tokens=0, text_tokens=None, image_tokens=None)), service_tier='default')\n"
-     ]
-    }
-   ],
-   "source": [
-    "# !gcloud auth application-default login - run this to add vertex credentials to your env\n",
-    "import litellm, os\n",
-    "from litellm import completion \n",
-    "from pydantic import BaseModel \n",
-    "\n",
-    "\n",
-    "messages=[\n",
-    "        {\"role\": \"system\", \"content\": \"Extract the event information.\"},\n",
-    "        {\"role\": \"user\", \"content\": \"Alice and Bob are going to a science fair on Friday.\"},\n",
-    "    ]\n",
-    "\n",
-    "litellm.enable_json_schema_validation = True\n",
-    "os.environ[\"LITELLM_LOG\"] = \"debug\"\n",
-    "\n",
-    "class CalendarEvent(BaseModel):\n",
-    "  name: str\n",
-    "  date: str\n",
-    "  participants: list[str]\n",
-    "\n",
-    "resp = completion(\n",
-    "    model=\"openai/gpt-4o\",\n",
-    "    messages=messages,\n",
-    "    response_format=CalendarEvent,\n",
-    ")\n",
-    "\n",
-    "print(\"Received={}\".format(resp))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"name\":\"Science Fair\",\"date\":\"Friday\",\"participants\":[\"Alice\",\"Bob\"]}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(resp.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "CalendarEvent(name='Science Fair', date='Friday', participants=['Alice', 'Bob'])"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "CalendarEvent.model_validate_json(resp.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Models Under Test for Structured Outputs\n",
-    "\n",
-    "- openai\n",
-    "- Anthropic\n",
-    "- Gemini\n",
-    "- Ollama"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "sonnet35 = \"anthropic/claude-3-5-sonnet-20240620\"\n",
-    "sonnet37 = \"anthropic/claude-3-7-sonnet-20250219\"\n",
-    "gemini25_flash = \"gemini/gemini-2.5-flash-preview-04-17\"\n",
-    "ollama_qwen3 = \"ollama/qwen3\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "JSONSchemaValidationError",
-     "evalue": "litellm.JSONSchemaValidationError: model=, returned an invalid response=<think>\nOkay, the user is asking for five important events in the 19th century. Let me start by recalling the key events of that period. The 19th century spans from 1801 to 1900, so I need to pick events that had significant global impact.\n\nFirst, the French Revolution was in the late 18th century, so maybe the Napoleonic Wars would be a good start. Those wars from 1803 to 1815 were major and affected many European countries. Then, the Industrial Revolution is a big one. It started in the late 18th century but continued into the 19th, so including the 1830s or 1840s would make sense.\n\nThe American Civil War is another key event, happening in the 1860s. It's a major conflict with lasting effects. Then, the unification of Germany in 1871 is important, as it was a significant political change. Finally, the abolition of slavery in the British Empire in 1833 comes to mind, though I should check the exact date. Wait, the British abolished slavery in 1833, but the US did it earlier in 1865. Maybe the user wants the British one since it's a global event. Alternatively, the Mexican-American War in 1846-1848 could be another, but I think the abolition is more impactful. Let me verify the dates to ensure accuracy. Also, considering the user might be looking for a mix of political, social, and technological events. I should also think about other events like the abolition of serfdom in Russia in 1861 or the colonization of Africa, but those might not be as universally significant. Let me list the five as: Napoleonic Wars, Industrial Revolution, American Civil War, German Unification, and British Abolition of Slavery. That should cover different regions and aspects. Need to make sure each event is clearly dated and explained briefly.\n</think>\n\nHere are five significant events of the 19th century (1801–1900):\n\n1. **Napoleonic Wars (1803–1815)**  \n   A series of conflicts involving Napoleon Bonaparte's French Empire, reshaping European borders, spreading revolutionary ideas, and leading to the rise of nationalism and the Congress of Vienna (1815).\n\n2. **Industrial Revolution (18th–19th centuries)**  \n   A transformative period of technological and economic change, starting in Britain in the late 18th century but accelerating in the 19th. It revolutionized manufacturing, transportation, and urbanization, laying the groundwork for modern industrial societies.\n\n3. **American Civil War (1861–1865)**  \n   A conflict between the Union (Northern states) and the Confederacy (Southern states) over slavery, states' rights, and economic systems. The Union's victory led to the abolition of slavery in the U.S. via the 13th Amendment (1865).\n\n4. **Unification of Germany (1871)**  \n   Otto von Bismarck's political maneuvering culminated in the creation of the German Empire after the Franco-Prussian War (1870–1871), marking the end of fragmented German states and the rise of a centralized European power.\n\n5. **Abolition of Slavery in the British Empire (1833)**  \n   The Slavery Abolition Act (1833) formally ended slavery in most British colonies, though it did not immediately free enslaved people in all regions. This marked a pivotal step in global human rights movements.\n\nThese events shaped modern politics, economies, and societies, with lasting impacts into the 20th century., for schema={\"$defs\": {\"Event\": {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"date\": {\"title\": \"Date\", \"type\": \"string\"}, \"participants\": {\"items\": {\"type\": \"string\"}, \"title\": \"Participants\", \"type\": \"array\"}}, \"required\": [\"name\", \"date\", \"participants\"], \"title\": \"Event\", \"type\": \"object\", \"additionalProperties\": false}}, \"properties\": {\"events\": {\"items\": {\"$ref\": \"#/$defs/Event\"}, \"title\": \"Events\", \"type\": \"array\"}}, \"required\": [\"events\"], \"title\": \"EventsList\", \"type\": \"object\", \"additionalProperties\": false}.\nAccess raw response with `e.raw_response`",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mJSONDecodeError\u001b[39m                           Traceback (most recent call last)",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/litellm_core_utils/json_validation_rule.py:17\u001b[39m, in \u001b[36mvalidate_schema\u001b[39m\u001b[34m(schema, response)\u001b[39m\n\u001b[32m     16\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m---> \u001b[39m\u001b[32m17\u001b[39m     response_dict = \u001b[43mjson\u001b[49m\u001b[43m.\u001b[49m\u001b[43mloads\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m     18\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m json.JSONDecodeError:\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/json/__init__.py:346\u001b[39m, in \u001b[36mloads\u001b[39m\u001b[34m(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)\u001b[39m\n\u001b[32m    343\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m (\u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[32m    344\u001b[39m         parse_int \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m parse_float \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m\n\u001b[32m    345\u001b[39m         parse_constant \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m object_pairs_hook \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kw):\n\u001b[32m--> \u001b[39m\u001b[32m346\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_default_decoder\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdecode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    347\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcls\u001b[39m \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/json/decoder.py:338\u001b[39m, in \u001b[36mJSONDecoder.decode\u001b[39m\u001b[34m(self, s, _w)\u001b[39m\n\u001b[32m    334\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Return the Python representation of ``s`` (a ``str`` instance\u001b[39;00m\n\u001b[32m    335\u001b[39m \u001b[33;03mcontaining a JSON document).\u001b[39;00m\n\u001b[32m    336\u001b[39m \n\u001b[32m    337\u001b[39m \u001b[33;03m\"\"\"\u001b[39;00m\n\u001b[32m--> \u001b[39m\u001b[32m338\u001b[39m obj, end = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mraw_decode\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43midx\u001b[49m\u001b[43m=\u001b[49m\u001b[43m_w\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[32;43m0\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m.\u001b[49m\u001b[43mend\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    339\u001b[39m end = _w(s, end).end()\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/.local/share/uv/python/cpython-3.12.8-macos-aarch64-none/lib/python3.12/json/decoder.py:356\u001b[39m, in \u001b[36mJSONDecoder.raw_decode\u001b[39m\u001b[34m(self, s, idx)\u001b[39m\n\u001b[32m    355\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mStopIteration\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m err:\n\u001b[32m--> \u001b[39m\u001b[32m356\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m JSONDecodeError(\u001b[33m\"\u001b[39m\u001b[33mExpecting value\u001b[39m\u001b[33m\"\u001b[39m, s, err.value) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m    357\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m obj, end\n",
-      "\u001b[31mJSONDecodeError\u001b[39m: Expecting value: line 1 column 1 (char 0)",
-      "\nDuring handling of the above exception, another exception occurred:\n",
-      "\u001b[31mJSONSchemaValidationError\u001b[39m                 Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[25]\u001b[39m\u001b[32m, line 12\u001b[39m\n\u001b[32m      9\u001b[39m \u001b[38;5;28;01mclass\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mEventsList\u001b[39;00m(BaseModel):\n\u001b[32m     10\u001b[39m     events: \u001b[38;5;28mlist\u001b[39m[Event]\n\u001b[32m---> \u001b[39m\u001b[32m12\u001b[39m response = \u001b[43mcompletion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     13\u001b[39m \u001b[43m  \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mollama_qwen3\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     14\u001b[39m \u001b[43m  \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mList 5 important events in the XIX century\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m     15\u001b[39m \u001b[43m  \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m=\u001b[49m\u001b[43mEventsList\u001b[49m\n\u001b[32m     16\u001b[39m \u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/utils.py:1283\u001b[39m, in \u001b[36mclient.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m   1279\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m logging_obj:\n\u001b[32m   1280\u001b[39m     logging_obj.failure_handler(\n\u001b[32m   1281\u001b[39m         e, traceback_exception, start_time, end_time\n\u001b[32m   1282\u001b[39m     )  \u001b[38;5;66;03m# DO NOT MAKE THREADED - router retry fallback relies on this!\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1283\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m e\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/utils.py:1199\u001b[39m, in \u001b[36mclient.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m   1196\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m result\n\u001b[32m   1198\u001b[39m \u001b[38;5;66;03m### POST-CALL RULES ###\u001b[39;00m\n\u001b[32m-> \u001b[39m\u001b[32m1199\u001b[39m \u001b[43mpost_call_processing\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m   1200\u001b[39m \u001b[43m    \u001b[49m\u001b[43moriginal_response\u001b[49m\u001b[43m=\u001b[49m\u001b[43mresult\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1201\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m   1202\u001b[39m \u001b[43m    \u001b[49m\u001b[43moptional_params\u001b[49m\u001b[43m=\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m   1203\u001b[39m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m   1205\u001b[39m \u001b[38;5;66;03m# [OPTIONAL] ADD TO CACHE\u001b[39;00m\n\u001b[32m   1206\u001b[39m _llm_caching_handler.sync_set_cache(\n\u001b[32m   1207\u001b[39m     result=result,\n\u001b[32m   1208\u001b[39m     args=args,\n\u001b[32m   1209\u001b[39m     kwargs=kwargs,\n\u001b[32m   1210\u001b[39m )\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/utils.py:999\u001b[39m, in \u001b[36mclient.<locals>.post_call_processing\u001b[39m\u001b[34m(original_response, model, optional_params)\u001b[39m\n\u001b[32m    991\u001b[39m                             litellm.litellm_core_utils.json_validation_rule.validate_schema(\n\u001b[32m    992\u001b[39m                                 schema=optional_params[\u001b[33m\"\u001b[39m\u001b[33mresponse_format\u001b[39m\u001b[33m\"\u001b[39m][\n\u001b[32m    993\u001b[39m                                     \u001b[33m\"\u001b[39m\u001b[33mresponse_schema\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m    994\u001b[39m                                 ],\n\u001b[32m    995\u001b[39m                                 response=model_response,\n\u001b[32m    996\u001b[39m                             )\n\u001b[32m    998\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[32m--> \u001b[39m\u001b[32m999\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m e\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/utils.py:958\u001b[39m, in \u001b[36mclient.<locals>.post_call_processing\u001b[39m\u001b[34m(original_response, model, optional_params)\u001b[39m\n\u001b[32m    950\u001b[39m             json_response_format = (\n\u001b[32m    951\u001b[39m                 type_to_response_format_param(\n\u001b[32m    952\u001b[39m                     response_format=optional_params[\n\u001b[32m   (...)\u001b[39m\u001b[32m    955\u001b[39m                 )\n\u001b[32m    956\u001b[39m             )\n\u001b[32m    957\u001b[39m         \u001b[38;5;28;01mif\u001b[39;00m json_response_format \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m958\u001b[39m             \u001b[43mlitellm\u001b[49m\u001b[43m.\u001b[49m\u001b[43mlitellm_core_utils\u001b[49m\u001b[43m.\u001b[49m\u001b[43mjson_validation_rule\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate_schema\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    959\u001b[39m \u001b[43m                \u001b[49m\u001b[43mschema\u001b[49m\u001b[43m=\u001b[49m\u001b[43mjson_response_format\u001b[49m\u001b[43m[\u001b[49m\n\u001b[32m    960\u001b[39m \u001b[43m                    \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mjson_schema\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\n\u001b[32m    961\u001b[39m \u001b[43m                \u001b[49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mschema\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    962\u001b[39m \u001b[43m                \u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmodel_response\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    963\u001b[39m \u001b[43m            \u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m    964\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m:\n\u001b[32m    965\u001b[39m     \u001b[38;5;28;01mpass\u001b[39;00m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/litellm/litellm_core_utils/json_validation_rule.py:19\u001b[39m, in \u001b[36mvalidate_schema\u001b[39m\u001b[34m(schema, response)\u001b[39m\n\u001b[32m     17\u001b[39m     response_dict = json.loads(response)\n\u001b[32m     18\u001b[39m \u001b[38;5;28;01mexcept\u001b[39;00m json.JSONDecodeError:\n\u001b[32m---> \u001b[39m\u001b[32m19\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m JSONSchemaValidationError(\n\u001b[32m     20\u001b[39m         model=\u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m, llm_provider=\u001b[33m\"\u001b[39m\u001b[33m\"\u001b[39m, raw_response=response, schema=json.dumps(schema)\n\u001b[32m     21\u001b[39m     )\n\u001b[32m     23\u001b[39m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[32m     24\u001b[39m     validate(response_dict, schema=schema)\n",
-      "\u001b[31mJSONSchemaValidationError\u001b[39m: litellm.JSONSchemaValidationError: model=, returned an invalid response=<think>\nOkay, the user is asking for five important events in the 19th century. Let me start by recalling the key events of that period. The 19th century spans from 1801 to 1900, so I need to pick events that had significant global impact.\n\nFirst, the French Revolution was in the late 18th century, so maybe the Napoleonic Wars would be a good start. Those wars from 1803 to 1815 were major and affected many European countries. Then, the Industrial Revolution is a big one. It started in the late 18th century but continued into the 19th, so including the 1830s or 1840s would make sense.\n\nThe American Civil War is another key event, happening in the 1860s. It's a major conflict with lasting effects. Then, the unification of Germany in 1871 is important, as it was a significant political change. Finally, the abolition of slavery in the British Empire in 1833 comes to mind, though I should check the exact date. Wait, the British abolished slavery in 1833, but the US did it earlier in 1865. Maybe the user wants the British one since it's a global event. Alternatively, the Mexican-American War in 1846-1848 could be another, but I think the abolition is more impactful. Let me verify the dates to ensure accuracy. Also, considering the user might be looking for a mix of political, social, and technological events. I should also think about other events like the abolition of serfdom in Russia in 1861 or the colonization of Africa, but those might not be as universally significant. Let me list the five as: Napoleonic Wars, Industrial Revolution, American Civil War, German Unification, and British Abolition of Slavery. That should cover different regions and aspects. Need to make sure each event is clearly dated and explained briefly.\n</think>\n\nHere are five significant events of the 19th century (1801–1900):\n\n1. **Napoleonic Wars (1803–1815)**  \n   A series of conflicts involving Napoleon Bonaparte's French Empire, reshaping European borders, spreading revolutionary ideas, and leading to the rise of nationalism and the Congress of Vienna (1815).\n\n2. **Industrial Revolution (18th–19th centuries)**  \n   A transformative period of technological and economic change, starting in Britain in the late 18th century but accelerating in the 19th. It revolutionized manufacturing, transportation, and urbanization, laying the groundwork for modern industrial societies.\n\n3. **American Civil War (1861–1865)**  \n   A conflict between the Union (Northern states) and the Confederacy (Southern states) over slavery, states' rights, and economic systems. The Union's victory led to the abolition of slavery in the U.S. via the 13th Amendment (1865).\n\n4. **Unification of Germany (1871)**  \n   Otto von Bismarck's political maneuvering culminated in the creation of the German Empire after the Franco-Prussian War (1870–1871), marking the end of fragmented German states and the rise of a centralized European power.\n\n5. **Abolition of Slavery in the British Empire (1833)**  \n   The Slavery Abolition Act (1833) formally ended slavery in most British colonies, though it did not immediately free enslaved people in all regions. This marked a pivotal step in global human rights movements.\n\nThese events shaped modern politics, economies, and societies, with lasting impacts into the 20th century., for schema={\"$defs\": {\"Event\": {\"properties\": {\"name\": {\"title\": \"Name\", \"type\": \"string\"}, \"date\": {\"title\": \"Date\", \"type\": \"string\"}, \"participants\": {\"items\": {\"type\": \"string\"}, \"title\": \"Participants\", \"type\": \"array\"}}, \"required\": [\"name\", \"date\", \"participants\"], \"title\": \"Event\", \"type\": \"object\", \"additionalProperties\": false}}, \"properties\": {\"events\": {\"items\": {\"$ref\": \"#/$defs/Event\"}, \"title\": \"Events\", \"type\": \"array\"}}, \"required\": [\"events\"], \"title\": \"EventsList\", \"type\": \"object\", \"additionalProperties\": false}.\nAccess raw response with `e.raw_response`"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import completion\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "class Event(BaseModel):\n",
-    "    name: str\n",
-    "    date: str\n",
-    "    participants: list[str]\n",
-    "\n",
-    "class EventsList(BaseModel):\n",
-    "    events: list[Event]\n",
-    "\n",
-    "response = completion(\n",
-    "  model=ollama_qwen3,\n",
-    "  messages=[{\"role\": \"user\", \"content\": \"List 5 important events in the XIX century\"}],\n",
-    "  response_format=EventsList\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "name='Louisiana Purchase' date='1803' participants=['United States', 'France']\n",
-      "name='Napoleonic Wars end (Battle of Waterloo)' date='1815' participants=['France', 'United Kingdom', 'Prussia', 'Russia', 'Austria']\n",
-      "name='Unification of Italy' date='1861' participants=['Sardinia', 'France', 'Austria', 'Kingdom of Two Sicilies']\n",
-      "name='American Civil War' date='1861-1865' participants=['Union', 'Confederacy']\n",
-      "name='Scramble for Africa begins (Berlin Conference)' date='1884-1885' participants=['European powers']\n"
-     ]
-    }
-   ],
-   "source": [
-    "for e in EventsList.model_validate_json(response.choices[0].message.content).events:\n",
-    "  print(e)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "from litellm.utils import supports_response_schema\n",
-    "\n",
-    "supports_response_schema(model=ollama_qwen3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"LITELLM_LOG\"] = \"debug\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from litellm import completion\n",
-    "from pydantic import BaseModel\n",
-    "\n",
-    "# Configure for Ollama\n",
-    "response = completion(\n",
-    "    model=ollama_qwen3,  # Use ollama_chat/ prefix for chat endpoint\n",
-    "    messages=[{\n",
-    "        \"role\": \"user\",\n",
-    "        \"content\": \"Describe the benefits of exercise\"\n",
-    "    }],\n",
-    "    api_base=\"http://localhost:11434\",  # Ollama server URL\n",
-    "    response_format={\n",
-    "        \"type\": \"json_schema\",\n",
-    "        \"json_schema\": {\n",
-    "            \"schema\": {\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"benefits\": {\n",
-    "                        \"type\": \"array\",\n",
-    "                        \"items\": {\"type\": \"string\"}\n",
-    "                    },\n",
-    "                    \"recommendation\": {\"type\": \"string\"}\n",
-    "                },\n",
-    "                \"required\": [\"benefits\", \"recommendation\"]\n",
-    "            }\n",
-    "        }\n",
-    "    }\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<think>\n",
-      "Okay, the user wants me to describe the benefits of exercise. Let me start by recalling all the main areas where exercise has positive effects. First, physical health comes to mind. Exercise improves cardiovascular health, strengthens muscles and bones, helps with weight management, and boosts the immune system. Oh, and it can also help with chronic conditions like diabetes or hypertension.\n",
-      "\n",
-      "Then there's mental health. I remember that exercise releases endorphins, which can reduce stress and anxiety. It might also improve mood and help with depression. Maybe I should mention the role of exercise in cognitive function, like enhancing memory and concentration.\n",
-      "\n",
-      "Social benefits are another aspect. Group activities or team sports can foster social connections and a sense of community. That's important for mental well-being too.\n",
-      "\n",
-      "Longevity and quality of life are key points. Regular exercise can increase lifespan and help maintain independence as people age. It might also reduce the risk of certain diseases, like some cancers.\n",
-      "\n",
-      "I should also think about the psychological benefits, such as increased self-esteem and confidence. Maybe touch on how exercise can improve sleep quality, which ties back to both physical and mental health.\n",
-      "\n",
-      "Wait, did I cover all the main areas? Let me check. Physical, mental, social, longevity, and maybe even some aspects like creativity or productivity? I should make sure each point is clear and concise. Also, need to present them in a logical order. Maybe start with physical, then mental, social, and then the broader impacts like longevity and quality of life. Avoid being too technical, keep it accessible. Oh, and maybe include something about how exercise can be tailored to different age groups or abilities. But the user didn't specify, so maybe keep it general. Alright, I think that covers the main points. Let me structure this into a coherent answer now.\n",
-      "</think>\n",
-      "\n",
-      "Exercise offers a wide range of benefits for both physical and mental health, as well as overall quality of life. Here are some key advantages:\n",
-      "\n",
-      "### **1. Physical Health Benefits**  \n",
-      "- **Cardiovascular Health**: Improves heart and lung function, reduces the risk of heart disease, stroke, and high blood pressure.  \n",
-      "- **Muscle and Bone Strength**: Builds muscle mass, enhances bone density, and prevents conditions like osteoporosis.  \n",
-      "- **Weight Management**: Helps burn calories, maintain a healthy weight, and reduce the risk of obesity-related diseases (e.g., diabetes, sleep apnea).  \n",
-      "- **Immune System Boost**: Strengthens the immune system, reducing the risk of chronic illnesses and infections.  \n",
-      "- **Chronic Disease Prevention**: Lowers the risk of conditions like type 2 diabetes, certain cancers, and metabolic syndrome.  \n",
-      "\n",
-      "### **2. Mental and Emotional Benefits**  \n",
-      "- **Stress Reduction**: Releases endorphins, which act as natural mood lifters, and reduces stress hormones like cortisol.  \n",
-      "- **Improved Mood**: Alleviates symptoms of depression and anxiety, promoting a sense of well-being.  \n",
-      "- **Cognitive Function**: Enhances memory, concentration, and problem-solving skills, while reducing the risk of age-related cognitive decline (e.g., dementia).  \n",
-      "- **Better Sleep**: Regulates sleep patterns and improves sleep quality, which is essential for recovery and mental clarity.  \n",
-      "\n",
-      "### **3. Social and Psychological Benefits**  \n",
-      "- **Increased Self-Esteem**: Achieving fitness goals fosters confidence and a positive self-image.  \n",
-      "- **Social Connection**: Group activities, team sports, or exercise classes build community and reduce feelings of isolation.  \n",
-      "- **Resilience and Coping Skills**: Regular physical activity enhances emotional resilience and helps manage life’s challenges.  \n",
-      "\n",
-      "### **4. Longevity and Quality of Life**  \n",
-      "- **Extended Lifespan**: Regular exercise is linked to a longer, healthier life by reducing the risk of chronic diseases and improving overall vitality.  \n",
-      "- **Mobility and Independence**: Maintains physical function and flexibility, especially in older adults, enabling independent living.  \n",
-      "- **Energy Levels**: Boosts stamina and reduces fatigue, making daily tasks easier to manage.  \n",
-      "\n",
-      "### **5. Additional Benefits**  \n",
-      "- **Creativity and Productivity**: Physical activity can enhance focus, creativity, and productivity by improving brain function.  \n",
-      "- **Flexibility and Balance**: Reduces the risk of injuries and improves coordination, particularly in older adults.  \n",
-      "\n",
-      "In summary, exercise is a powerful tool for enhancing both physical and mental well-being, fostering resilience, and improving overall quality of life. Incorporating regular physical activity into daily routines can lead to long-term health benefits and a more balanced, fulfilling lifestyle.\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(response.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from litellm import completion\n",
-    "\n",
-    "response = completion(\n",
-    "    model=ollama_qwen3,\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"respond in json, what's the weather in san francisco\"\n",
-    "        }\n",
-    "    ],\n",
-    "    format=\"json\"  # Basic JSON mode\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"error\": \"true\", \"message\": \"Please provide a valid API key for the weather service.\"}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(response.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from litellm import completion\n",
-    "\n",
-    "response = completion(\n",
-    "    model=ollama_qwen3,\n",
-    "    messages=[{\n",
-    "        \"content\": \"respond in 20 words. who are you? respond in json\",\n",
-    "        \"role\": \"user\"\n",
-    "    }],\n",
-    "    response_format={\n",
-    "        \"type\": \"json_schema\", \n",
-    "        \"json_schema\": {\n",
-    "            \"schema\": {\n",
-    "                \"type\": \"object\", \n",
-    "                \"properties\": {\n",
-    "                    \"name\": {\"type\": \"string\"}\n",
-    "                }\n",
-    "            }\n",
-    "        }\n",
-    "    }\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<think>\n",
-      "Okay, the user asked \"who are you?\" and wants a 20-word response in JSON. Let me break this down. First, I need to identify who I am. I'm Qwen, a large language model developed by Alibaba Cloud. The user wants the answer concise, exactly 20 words, and in JSON format.\n",
-      "\n",
-      "So, I should structure the JSON with a key like \"response\" and the value being the 20-word summary. Let me count the words. \"I am Qwen, a large language model developed by Alibaba Cloud. I assist with information and tasks.\" That's 14 words. Need six more. Maybe add \"I provide answers, explanations, and support across various topics.\" Now that's 20. Let me check the word count again. Yes, exactly 20. Now format it into JSON. Make sure the syntax is correct with quotes and commas. Avoid any markdown. The user might be testing if I can follow strict instructions, so accuracy is key. Also, ensure the JSON is valid. Alright, that should do it.\n",
-      "</think>\n",
-      "\n",
-      "{\"response\": \"I am Qwen, a large language model developed by Alibaba Cloud. I assist with information and tasks.\"}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(response.choices[0].message.content)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\n",
-      "  \"name\": \"Harry Potter\",\n",
-      "  \"age\": 11,\n",
-      "  \"facts\": [\n",
-      "    \"Harry is a wizard who discovers he is a wizard at the age of eleven.\",\n",
-      "    \"He attends Hogwarts School of Witchcraft and Wizardry.\",\n",
-      "    \"He is known as 'The Boy Who Lived' because he survived an attack by the dark wizard Voldemort when he was a baby.\",\n",
-      "    \"He has a scar on his forehead from the attack.\",\n",
-      "    \"He is friends with Ron Weasley and Hermione Granger.\",\n",
-      "    \"He is the son of James and Lily Potter.\",\n",
-      "    \"He is the main character in the Harry Potter series of books and films.\"\n",
-      "  ]\n",
-      "}\n"
-     ]
-    }
-   ],
-   "source": [
-    "from openai import OpenAI\n",
-    "from pydantic import BaseModel\n",
-    "import instructor\n",
-    "\n",
-    "# Set up client with Ollama through LiteLLM proxy\n",
-    "client = instructor.from_openai(\n",
-    "    OpenAI(\n",
-    "        base_url=\"http://localhost:11434/v1\",  # Ollama endpoint\n",
-    "        api_key=\"ollama\",  # Required but unused\n",
-    "    ),\n",
-    "    mode=instructor.Mode.JSON,\n",
-    ")\n",
-    "\n",
-    "class Character(BaseModel):\n",
-    "    name: str\n",
-    "    age: int\n",
-    "    facts: list[str]\n",
-    "\n",
-    "response = client.chat.completions.create(\n",
-    "    model=\"qwen3\",\n",
-    "    messages=[\n",
-    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"Tell me about Harry Potter\"\n",
-    "        }\n",
-    "    ],\n",
-    "    response_model=Character,\n",
-    ")\n",
-    "\n",
-    "print(response.model_dump_json(indent=2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "TypeError",
-     "evalue": "OpenAI.__init__() got an unexpected keyword argument 'model'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mTypeError\u001b[39m                                 Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[22]\u001b[39m\u001b[32m, line 3\u001b[39m\n\u001b[32m      1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01mlitellm\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m OpenAI\n\u001b[32m----> \u001b[39m\u001b[32m3\u001b[39m client = \u001b[43mOpenAI\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m      4\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[43mollama_qwen3\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m      5\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m      7\u001b[39m response = client.chat.completions.create(\n\u001b[32m      8\u001b[39m     messages=[{\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33muser\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mTell me about Harry Potter\u001b[39m\u001b[33m\"\u001b[39m}],\n\u001b[32m      9\u001b[39m     response_model=Character,\n\u001b[32m     10\u001b[39m )\n\u001b[32m     12\u001b[39m \u001b[38;5;28mprint\u001b[39m(response.model_dump_json(indent=\u001b[32m2\u001b[39m))\n",
-      "\u001b[31mTypeError\u001b[39m: OpenAI.__init__() got an unexpected keyword argument 'model'"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import OpenAI\n",
-    "\n",
-    "client = OpenAI(\n",
-    "    model=ollama_qwen3,\n",
-    ")\n",
-    "\n",
-    "response = client.chat.completions.create(\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Tell me about Harry Potter\"}],\n",
-    "    response_model=Character,\n",
-    ")\n",
-    "\n",
-    "print(response.model_dump_json(indent=2))"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NotFoundError",
-     "evalue": "Error code: 404 - {'error': {'message': 'The model `claude-3-5-sonnet-20241022` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[31m---------------------------------------------------------------------------\u001b[39m",
-      "\u001b[31mNotFoundError\u001b[39m                             Traceback (most recent call last)",
-      "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[23]\u001b[39m\u001b[32m, line 13\u001b[39m\n\u001b[32m      7\u001b[39m response = client.chat.completions.create(\n\u001b[32m      8\u001b[39m     model=\u001b[33m\"\u001b[39m\u001b[33mgpt-4o\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m      9\u001b[39m     messages=[{\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33muser\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mWrite a haiku\u001b[39m\u001b[33m\"\u001b[39m}]\n\u001b[32m     10\u001b[39m )\n\u001b[32m     12\u001b[39m \u001b[38;5;66;03m# Claude example (same client!)\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m13\u001b[39m response = \u001b[43mclient\u001b[49m\u001b[43m.\u001b[49m\u001b[43mchat\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcompletions\u001b[49m\u001b[43m.\u001b[49m\u001b[43mcreate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m     14\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m=\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mclaude-3-5-sonnet-20241022\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m     15\u001b[39m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m=\u001b[49m\u001b[43m[\u001b[49m\u001b[43m{\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mrole\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcontent\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mWrite a haiku\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m]\u001b[49m\n\u001b[32m     16\u001b[39m \u001b[43m)\u001b[49m\n\u001b[32m     18\u001b[39m \u001b[38;5;66;03m# Gemini example (same client!)\u001b[39;00m\n\u001b[32m     19\u001b[39m response = client.chat.completions.create(\n\u001b[32m     20\u001b[39m     model=\u001b[33m\"\u001b[39m\u001b[33mgemini/gemini-1.5-pro\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m     21\u001b[39m     messages=[{\u001b[33m\"\u001b[39m\u001b[33mrole\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33muser\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mcontent\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mWrite a haiku\u001b[39m\u001b[33m\"\u001b[39m}]\n\u001b[32m     22\u001b[39m )\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/openai/_utils/_utils.py:287\u001b[39m, in \u001b[36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[39m\u001b[34m(*args, **kwargs)\u001b[39m\n\u001b[32m    285\u001b[39m             msg = \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[32m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m\n\u001b[32m    286\u001b[39m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[32m--> \u001b[39m\u001b[32m287\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[43m*\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m*\u001b[49m\u001b[43m*\u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/openai/resources/chat/completions/completions.py:925\u001b[39m, in \u001b[36mCompletions.create\u001b[39m\u001b[34m(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, web_search_options, extra_headers, extra_query, extra_body, timeout)\u001b[39m\n\u001b[32m    882\u001b[39m \u001b[38;5;129m@required_args\u001b[39m([\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mmodel\u001b[39m\u001b[33m\"\u001b[39m], [\u001b[33m\"\u001b[39m\u001b[33mmessages\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mmodel\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mstream\u001b[39m\u001b[33m\"\u001b[39m])\n\u001b[32m    883\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mcreate\u001b[39m(\n\u001b[32m    884\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m    922\u001b[39m     timeout: \u001b[38;5;28mfloat\u001b[39m | httpx.Timeout | \u001b[38;5;28;01mNone\u001b[39;00m | NotGiven = NOT_GIVEN,\n\u001b[32m    923\u001b[39m ) -> ChatCompletion | Stream[ChatCompletionChunk]:\n\u001b[32m    924\u001b[39m     validate_response_format(response_format)\n\u001b[32m--> \u001b[39m\u001b[32m925\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    926\u001b[39m \u001b[43m        \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43m/chat/completions\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[32m    927\u001b[39m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    928\u001b[39m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[32m    929\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmessages\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    930\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmodel\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    931\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43maudio\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43maudio\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    932\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfrequency_penalty\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfrequency_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    933\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfunction_call\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunction_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    934\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mfunctions\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunctions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    935\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlogit_bias\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogit_bias\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    936\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mlogprobs\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    937\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmax_completion_tokens\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_completion_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    938\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmax_tokens\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmax_tokens\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    939\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmetadata\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    940\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mmodalities\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodalities\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    941\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mn\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mn\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    942\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mparallel_tool_calls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mparallel_tool_calls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    943\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mprediction\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mprediction\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    944\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mpresence_penalty\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mpresence_penalty\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    945\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mreasoning_effort\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mreasoning_effort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    946\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mresponse_format\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    947\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mseed\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    948\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mservice_tier\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mservice_tier\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    949\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstop\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstop\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    950\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstore\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstore\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    951\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstream\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    952\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mstream_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    953\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtemperature\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtemperature\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    954\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtool_choice\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    955\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtools\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    956\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtop_logprobs\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_logprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    957\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mtop_p\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtop_p\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    958\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43muser\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43muser\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    959\u001b[39m \u001b[43m                \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mweb_search_options\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mweb_search_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    960\u001b[39m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    961\u001b[39m \u001b[43m            \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCompletionCreateParamsStreaming\u001b[49m\n\u001b[32m    962\u001b[39m \u001b[43m            \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\n\u001b[32m    963\u001b[39m \u001b[43m            \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mcompletion_create_params\u001b[49m\u001b[43m.\u001b[49m\u001b[43mCompletionCreateParamsNonStreaming\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    964\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    965\u001b[39m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[43m=\u001b[49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[32m    966\u001b[39m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[43m=\u001b[49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m=\u001b[49m\u001b[43mtimeout\u001b[49m\n\u001b[32m    967\u001b[39m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    968\u001b[39m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m=\u001b[49m\u001b[43mChatCompletion\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    969\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[32m    970\u001b[39m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mChatCompletionChunk\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[32m    971\u001b[39m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/openai/_base_client.py:1239\u001b[39m, in \u001b[36mSyncAPIClient.post\u001b[39m\u001b[34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[39m\n\u001b[32m   1225\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mpost\u001b[39m(\n\u001b[32m   1226\u001b[39m     \u001b[38;5;28mself\u001b[39m,\n\u001b[32m   1227\u001b[39m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[32m   (...)\u001b[39m\u001b[32m   1234\u001b[39m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] | \u001b[38;5;28;01mNone\u001b[39;00m = \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[32m   1235\u001b[39m ) -> ResponseT | _StreamT:\n\u001b[32m   1236\u001b[39m     opts = FinalRequestOptions.construct(\n\u001b[32m   1237\u001b[39m         method=\u001b[33m\"\u001b[39m\u001b[33mpost\u001b[39m\u001b[33m\"\u001b[39m, url=path, json_data=body, files=to_httpx_files(files), **options\n\u001b[32m   1238\u001b[39m     )\n\u001b[32m-> \u001b[39m\u001b[32m1239\u001b[39m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m=\u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "\u001b[36mFile \u001b[39m\u001b[32m~/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/openai/_base_client.py:1034\u001b[39m, in \u001b[36mSyncAPIClient.request\u001b[39m\u001b[34m(self, cast_to, options, stream, stream_cls)\u001b[39m\n\u001b[32m   1031\u001b[39m             err.response.read()\n\u001b[32m   1033\u001b[39m         log.debug(\u001b[33m\"\u001b[39m\u001b[33mRe-raising status error\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1034\u001b[39m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m._make_status_error_from_response(err.response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[32m   1036\u001b[39m     \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[32m   1038\u001b[39m \u001b[38;5;28;01massert\u001b[39;00m response \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[33m\"\u001b[39m\u001b[33mcould not resolve response (should never happen)\u001b[39m\u001b[33m\"\u001b[39m\n",
-      "\u001b[31mNotFoundError\u001b[39m: Error code: 404 - {'error': {'message': 'The model `claude-3-5-sonnet-20241022` does not exist or you do not have access to it.', 'type': 'invalid_request_error', 'param': None, 'code': 'model_not_found'}}"
-     ]
-    }
-   ],
-   "source": [
-    "from litellm import OpenAI\n",
-    "\n",
-    "# Create client that works with any supported model\n",
-    "client = OpenAI()\n",
-    "\n",
-    "# GPT-4 example\n",
-    "response = client.chat.completions.create(\n",
-    "    model=\"gpt-4o\",\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Write a haiku\"}]\n",
-    ")\n",
-    "\n",
-    "# Claude example (same client!)\n",
-    "response = client.chat.completions.create(\n",
-    "    model=\"claude-3-5-sonnet-20241022\",\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Write a haiku\"}]\n",
-    ")\n",
-    "\n",
-    "# Gemini example (same client!)\n",
-    "response = client.chat.completions.create(\n",
-    "    model=\"gemini/gemini-1.5-pro\",\n",
-    "    messages=[{\"role\": \"user\", \"content\": \"Write a haiku\"}]\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Function Calling"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "True"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "import litellm\n",
-    "\n",
-    "litellm.utils.supports_function_calling(model=ollama_qwen3)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "False"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "litellm.utils.supports_parallel_function_calling(model=gemini25_flash)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "\n",
-    "def get_current_weather(location, unit=\"fahrenheit\"):\n",
-    "    \"\"\"Get the current weather in a given location\"\"\"\n",
-    "    if \"tokyo\" in location.lower():\n",
-    "        return json.dumps({\"location\": \"Tokyo\", \"temperature\": \"10\", \"unit\": \"celsius\"})\n",
-    "    elif \"san francisco\" in location.lower():\n",
-    "        return json.dumps({\"location\": \"San Francisco\", \"temperature\": \"72\", \"unit\": \"fahrenheit\"})\n",
-    "    elif \"paris\" in location.lower():\n",
-    "        return json.dumps({\"location\": \"Paris\", \"temperature\": \"22\", \"unit\": \"celsius\"})\n",
-    "    else:\n",
-    "        return json.dumps({\"location\": location, \"temperature\": \"unknown\"})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def test_parallel_function_call():\n",
-    "    try:\n",
-    "        # Step 1: send the conversation and available functions to the model\n",
-    "        messages = [{\"role\": \"user\", \"content\": \"What's the weather like in San Francisco, Tokyo, and Paris?\"}]\n",
-    "        tools = [\n",
-    "            {\n",
-    "                \"type\": \"function\",\n",
-    "                \"function\": {\n",
-    "                    \"name\": \"get_current_weather\",\n",
-    "                    \"description\": \"Get the current weather in a given location\",\n",
-    "                    \"parameters\": {\n",
-    "                        \"type\": \"object\",\n",
-    "                        \"properties\": {\n",
-    "                            \"location\": {\n",
-    "                                \"type\": \"string\",\n",
-    "                                \"description\": \"The city and state, e.g. San Francisco, CA\",\n",
-    "                            },\n",
-    "                            \"unit\": {\"type\": \"string\", \"enum\": [\"celsius\", \"fahrenheit\"]},\n",
-    "                        },\n",
-    "                        \"required\": [\"location\"],\n",
-    "                    },\n",
-    "                },\n",
-    "            }\n",
-    "        ]\n",
-    "        response = litellm.completion(\n",
-    "            model=\"gpt-3.5-turbo-1106\",\n",
-    "            messages=messages,\n",
-    "            tools=tools,\n",
-    "            tool_choice=\"auto\",  # auto is default, but we'll be explicit\n",
-    "        )\n",
-    "        print(\"\\nFirst LLM Response:\\n\", response)\n",
-    "        response_message = response.choices[0].message\n",
-    "        tool_calls = response_message.tool_calls\n",
-    "\n",
-    "        print(\"\\nLength of tool calls\", len(tool_calls))\n",
-    "\n",
-    "        # Step 2: check if the model wanted to call a function\n",
-    "        if tool_calls:\n",
-    "            # Step 3: call the function\n",
-    "            # Note: the JSON response may not always be valid; be sure to handle errors\n",
-    "            available_functions = {\n",
-    "                \"get_current_weather\": get_current_weather,\n",
-    "            }  # only one function in this example, but you can have multiple\n",
-    "            messages.append(response_message)  # extend conversation with assistant's reply\n",
-    "\n",
-    "            # Step 4: send the info for each function call and function response to the model\n",
-    "            for tool_call in tool_calls:\n",
-    "                function_name = tool_call.function.name\n",
-    "                function_to_call = available_functions[function_name]\n",
-    "                function_args = json.loads(tool_call.function.arguments)\n",
-    "                function_response = function_to_call(\n",
-    "                    location=function_args.get(\"location\"),\n",
-    "                    unit=function_args.get(\"unit\"),\n",
-    "                )\n",
-    "                messages.append(\n",
-    "                    {\n",
-    "                        \"tool_call_id\": tool_call.id,\n",
-    "                        \"role\": \"tool\",\n",
-    "                        \"name\": function_name,\n",
-    "                        \"content\": function_response,\n",
-    "                    }\n",
-    "                )  # extend conversation with function response\n",
-    "            second_response = litellm.completion(\n",
-    "                model=\"gpt-3.5-turbo-1106\",\n",
-    "                messages=messages,\n",
-    "            )  # get a new response from the model where it can see the function response\n",
-    "            print(\"\\nSecond LLM response:\\n\", second_response)\n",
-    "            return second_response\n",
-    "    except Exception as e:\n",
-    "      print(f\"Error occurred: {e}\")\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "test_parallel_function_call()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/explanation/openllmetry.ipynb b/experimental/old_nbs/explanation/openllmetry.ipynb
deleted file mode 100644
index aeea34817..000000000
--- a/experimental/old_nbs/explanation/openllmetry.ipynb
+++ /dev/null
@@ -1,270 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "os.environ[\"TRACELOOP_BASE_URL\"]=\"https://api.smith.langchain.com/otel\"\n",
-    "os.environ[\"TRACELOOP_HEADERS\"]=\"x-api-key=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\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from openai import OpenAI\n",
-    "from traceloop.sdk import Traceloop\n",
-    "\n",
-    "client = OpenAI()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Header format invalid! Header values in environment variables must be URL encoded per the OpenTelemetry Protocol Exporter specification: Authorization=Basic cGstbGYtMTA0Yzg1NTYtYjI3Yy00ZGY1LWI4YmYtYWFhYWQyNzQyNzhjOnNrLWxmLTRjNmFhYjgxLWI2NmQtNDg3ZC04NTc4LTgwZGIxYTZmM2ZkMw==\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[39m\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "ChatCompletionMessage(content='Code calls to itself,  \\nLoops within endless cycles—  \\nDepth reveals design.', refusal=None, role='assistant', annotations=[], audio=None, function_call=None, tool_calls=None)\n"
-     ]
-    }
-   ],
-   "source": [
-    "Traceloop.init()\n",
-    "\n",
-    "completion = client.chat.completions.create(\n",
-    "    model=\"gpt-4o\",\n",
-    "    messages=[\n",
-    "        {\"role\": \"system\", \"content\": \"You are a helpful assistant.\"},\n",
-    "        {\n",
-    "            \"role\": \"user\",\n",
-    "            \"content\": \"Write a haiku about recursion in programming.\"\n",
-    "        }\n",
-    "    ]\n",
-    ")\n",
-    "\n",
-    "print(completion.choices[0].message)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from traceloop.sdk.decorators import workflow, task\n",
-    "\n",
-    "@task(name=\"joke_creation\")\n",
-    "def create_joke():\n",
-    "    completion = client.chat.completions.create(\n",
-    "        model=\"gpt-3.5-turbo\",\n",
-    "        messages=[{\"role\": \"user\", \"content\": \"Tell me a joke about opentelemetry\"}],\n",
-    "    )\n",
-    "\n",
-    "    return completion.choices[0].message.content\n",
-    "\n",
-    "@task(name=\"signature_generation\")\n",
-    "def generate_signature(joke: str):\n",
-    "    completion = client.chat.completions.create(\n",
-    "        model=\"gpt-3.5-turbo\",\n",
-    "        messages=[{\"role\": \"user\", \"content\": \"add a signature to the joke:\\n\\n\" + joke}],\n",
-    "    )\n",
-    "\n",
-    "    return completion.choices[0].message.content\n",
-    "\n",
-    "@task(name=\"translate_joke_to_pirate\")\n",
-    "def translate_joke_to_pirate(joke: str):\n",
-    "    completion = client.chat.completions.create(\n",
-    "        model=\"gpt-3.5-turbo\",\n",
-    "        messages=[{\"role\": \"user\", \"content\": \"Translate the joke to pirate language:\\n\\n\" + joke}],\n",
-    "    )\n",
-    "\n",
-    "    return completion.choices[0].message.content\n",
-    "\n",
-    "\n",
-    "@workflow(name=\"pirate_joke_generator\")\n",
-    "def joke_workflow():\n",
-    "    eng_joke = create_joke()\n",
-    "    pirate_joke = translate_joke_to_pirate(eng_joke)\n",
-    "    signature = generate_signature(pirate_joke)\n",
-    "    print(pirate_joke + \"\\n\\n\" + signature)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n",
-      "Failed to export batch code: 401, reason: {\"message\":\"No authorization header\"}\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Why did the Opentelemetry developer go broke? Because they couldn't trace where all their doubloons went!\n",
-      "\n",
-      "- Captain OpenTrace\n"
-     ]
-    }
-   ],
-   "source": [
-    "joke_workflow()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import base64\n",
-    "import urllib.parse\n",
-    "\n",
-    "LANGFUSE_PUBLIC_KEY=\"pk-lf-104c8556-b27c-4df5-b8bf-aaaad274278c\"\n",
-    "LANGFUSE_SECRET_KEY=\"sk-lf-4c6aab81-b66d-487d-8578-80db1a6f3fd3\"\n",
-    "LANGFUSE_AUTH=base64.b64encode(f\"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}\".encode()).decode()\n",
-    "\n",
-    "os.environ[\"TRACELOOP_BASE_URL\"] = \"https://us.cloud.langfuse.com/api/public/otel\" # US data region\n",
-    "os.environ[\"TRACELOOP_HEADERS\"] = f\"Authorization={urllib.parse.quote('Basic ' + LANGFUSE_AUTH)}\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import os\n",
-    "import base64\n",
-    "from traceloop.sdk import Traceloop\n",
-    "\n",
-    "LANGFUSE_PUBLIC_KEY=\"pk-lf-104c8556-b27c-4df5-b8bf-aaaad274278c\"\n",
-    "LANGFUSE_SECRET_KEY=\"sk-lf-4c6aab81-b66d-487d-8578-80db1a6f3fd3\"\n",
-    "LANGFUSE_AUTH=base64.b64encode(f\"{LANGFUSE_PUBLIC_KEY}:{LANGFUSE_SECRET_KEY}\".encode()).decode()\n",
-    "\n",
-    "os.environ[\"TRACELOOP_BASE_URL\"] = \"https://us.cloud.langfuse.com/api/public/otel\" # US data region\n",
-    "os.environ[\"TRACELOOP_HEADERS\"] = f\"Authorization=Basic {LANGFUSE_AUTH}\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'cGstbGYtMTA0Yzg1NTYtYjI3Yy00ZGY1LWI4YmYtYWFhYWQyNzQyNzhjOnNrLWxmLTRjNmFhYjgxLWI2NmQtNDg3ZC04NTc4LTgwZGIxYTZmM2ZkMw=='"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "LANGFUSE_AUTH"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Header format invalid! Header values in environment variables must be URL encoded per the OpenTelemetry Protocol Exporter specification: Authorization=Basic cGstbGYtMTA0Yzg1NTYtYjI3Yy00ZGY1LWI4YmYtYWFhYWQyNzQyNzhjOnNrLWxmLTRjNmFhYjgxLWI2NmQtNDg3ZC04NTc4LTgwZGIxYTZmM2ZkMw==\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[39m\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "Traceloop.init(disable_batch=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/experimental/old_nbs/getting_started.ipynb b/experimental/old_nbs/getting_started.ipynb
deleted file mode 100644
index 49f79781c..000000000
--- a/experimental/old_nbs/getting_started.ipynb
+++ /dev/null
@@ -1,246 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "59afff51",
-   "metadata": {},
-   "source": [
-    "# Getting Started\n",
-    "\n",
-    "> An introduction to using the experimental features of Ragas"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "2f61bb1c",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/jjmachan/workspace/eglabs/ragas/.venv/lib/python3.12/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from ragas_experimental.utils import get_test_directory"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "7f07237a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import Project"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "8ed0453c",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'/var/folders/2y/02fp70k56p75ldrkgtx7z10r0000gn/T/ragas_test_dWkFywl8UuaC'"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "root_dir = get_test_directory()\n",
-    "root_dir"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "90ef5613",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<ragas_experimental.project.core.Project at 0x13021e4b0>"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "p = Project(\n",
-    "    project_id=\"test\",\n",
-    "    backend=\"local\",\n",
-    "    root_dir=root_dir,\n",
-    ")\n",
-    "\n",
-    "p"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "12eeeebb",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental import BaseModel\n",
-    "import typing as t\n",
-    "\n",
-    "class TestDataRow(BaseModel):\n",
-    "    id: t.Optional[int]\n",
-    "    query: str\n",
-    "    persona: t.List[t.Literal[\"opt1\", \"opt2\", \"opt3\"]]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "94c7339d",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Dataset(name='test_dataset', model=TestDataRow, len=0)"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "dataset = p.create_dataset(\n",
-    "    name=\"test_dataset\",\n",
-    "    model=TestDataRow,\n",
-    ")\n",
-    "\n",
-    "dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "7b039a42",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Dataset(name='test_dataset', model=TestDataRow, len=3)"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "for i in range(3):\n",
-    "    row = TestDataRow(id=i, query=f\"query_{i}\", persona=[\"opt1\"])\n",
-    "    dataset.append(row)\n",
-    "\n",
-    "dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "9261ba72",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from ragas_experimental.metric import MetricResult\n",
-    "\n",
-    "class ExperimentDataRow(TestDataRow):\n",
-    "    response: str \n",
-    "    metrics: t.List[MetricResult]\n",
-    "\n",
-    "\n",
-    "@p.experiment(ExperimentDataRow)\n",
-    "async def run_experiment(row: TestDataRow):\n",
-    "    response = \"test\"\n",
-    "    score1 = MetricResult(result=1, reason=\"score 1\")\n",
-    "    score2 = MetricResult(result=0, reason=\"score 2\")\n",
-    "\n",
-    "    experiment_view = ExperimentDataRow(\n",
-    "        id=row.id,\n",
-    "        query=row.query,\n",
-    "        persona=[\"opt1\"],\n",
-    "        response=response,\n",
-    "        metrics=[score1, score2],\n",
-    "    )\n",
-    "    return experiment_view"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "5baf8011",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Running experiment: 100%|██████████| 6/6 [00:00<00:00, 6424.77it/s]\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "Experiment(name=Workshop-stoic_milner, model=ExperimentDataRow, len=3)"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "await run_experiment.run_async(dataset)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "6f4aae99",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": ".venv",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/experimental/old_nbs/how-to/index.ipynb b/experimental/old_nbs/how-to/index.ipynb
deleted file mode 100644
index 4e373dd35..000000000
--- a/experimental/old_nbs/how-to/index.ipynb
+++ /dev/null
@@ -1,51 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# How-To Guides\n",
-    "\n",
-    "> Problem-oriented guides for Ragas Experimental"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "source": [
-    "#| hide\n",
-    "from ragas_experimental import *"
-   ],
-   "outputs": []
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Welcome to How-To Guides\n",
-    "\n",
-    "How-To Guides are problem-oriented materials designed to help you accomplish specific tasks with Ragas Experimental.\n",
-    "\n",
-    "Each guide addresses a specific use case or problem, providing clear instructions and code examples to implement solutions.\n",
-    "\n",
-    "### Available How-To Guides\n",
-    "\n",
-    "- Working with Datasets\n",
-    "- Setting Up Experiments\n",
-    "- Tracing with Langfuse\n",
-    "- Using Backend Services\n",
-    "- Implementing Dynamic Few-Shot Learning"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
\ No newline at end of file
diff --git a/experimental/old_nbs/nbdev.yml b/experimental/old_nbs/nbdev.yml
deleted file mode 100644
index 223a6255e..000000000
--- a/experimental/old_nbs/nbdev.yml
+++ /dev/null
@@ -1,9 +0,0 @@
-project:
-  output-dir: experimental
-
-website:
-  title: "ragas_experimental"
-  site-url: "https://explodinggradients.github.io/ragas_experimental"
-  description: "Experimental Ragas Evaluation UI and Library"
-  repo-branch: main
-  repo-url: "https://github.com/explodinggradients/ragas_experimental"
diff --git a/experimental/old_nbs/sidebar.yml b/experimental/old_nbs/sidebar.yml
deleted file mode 100644
index f68e02d17..000000000
--- a/experimental/old_nbs/sidebar.yml
+++ /dev/null
@@ -1,69 +0,0 @@
-website:
-  sidebar:
-    contents:
-      - index.ipynb
-      - getting_started.ipynb
-      
-      - section: "Tutorials"
-        contents:
-          - tutorials/index.ipynb
-      
-      - section: "How-to Guides"
-        contents:
-          - how-to/index.ipynb
-      
-      - section: "Core Concepts"
-        contents:
-          - explanation/index.ipynb
-          
-      - section: "References"
-        contents:
-          - api/index.ipynb
-          - api/dataset.ipynb
-          - api/experiment.ipynb
-          - api/typing.ipynb
-          - api/utils.ipynb
-          - api/exceptions.ipynb
-          
-          - section: "project"
-            contents:
-              - api/project/core.ipynb
-              - api/project/datasets.ipynb
-              - api/project/experiments.ipynb
-              - api/project/naming.ipynb
-          
-          - section: "metric"
-            contents:
-              - api/metric/base.ipynb
-              - api/metric/decorator.ipynb
-              - api/metric/discrete.ipynb
-              - api/metric/numeric.ipynb
-              - api/metric/ranking.ipynb
-              - api/metric/result.ipynb
-          
-          - section: "llm"
-            contents:
-              - api/llm/llm.ipynb
-          
-          - section: "embedding"
-            contents:
-              - api/embedding/base.ipynb
-              
-          - section: "prompt"
-            contents:
-              - api/prompt/base.ipynb
-              - api/prompt/dynamic_few_shot.ipynb
-          
-          - section: "backends"
-            contents:
-              - api/backends/factory.ipynb
-              - api/backends/ragas_api_client.ipynb
-              
-          - section: "tracing"
-            contents:
-              - api/tracing/langfuse.ipynb
-              - api/tracing/mlflow.ipynb
-              
-          - section: "model"
-            contents:
-              - api/model/pydantic_mode.ipynb
\ No newline at end of file
diff --git a/experimental/old_nbs/styles.css b/experimental/old_nbs/styles.css
deleted file mode 100644
index 66ccc49ee..000000000
--- a/experimental/old_nbs/styles.css
+++ /dev/null
@@ -1,37 +0,0 @@
-.cell {
-  margin-bottom: 1rem;
-}
-
-.cell > .sourceCode {
-  margin-bottom: 0;
-}
-
-.cell-output > pre {
-  margin-bottom: 0;
-}
-
-.cell-output > pre, .cell-output > .sourceCode > pre, .cell-output-stdout > pre {
-  margin-left: 0.8rem;
-  margin-top: 0;
-  background: none;
-  border-left: 2px solid lightsalmon;
-  border-top-left-radius: 0;
-  border-top-right-radius: 0;
-}
-
-.cell-output > .sourceCode {
-  border: none;
-}
-
-.cell-output > .sourceCode {
-  background: none;
-  margin-top: 0;
-}
-
-div.description {
-  padding-left: 2px;
-  padding-top: 5px;
-  font-style: italic;
-  font-size: 135%;
-  opacity: 70%;
-}
diff --git a/experimental/old_nbs/tutorials/index.ipynb b/experimental/old_nbs/tutorials/index.ipynb
deleted file mode 100644
index 7fe9ad742..000000000
--- a/experimental/old_nbs/tutorials/index.ipynb
+++ /dev/null
@@ -1,32 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Tutorials\n",
-    "\n",
-    "> Learning-oriented guides for Ragas Experimental"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#| hide\n",
-    "from ragas_experimental import *"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/experimental/pyproject.toml b/experimental/pyproject.toml
index dc77a5991..503eb1ae7 100644
--- a/experimental/pyproject.toml
+++ b/experimental/pyproject.toml
@@ -1,13 +1,51 @@
 [build-system]
-requires = ["setuptools>=64", "setuptools_scm>=8", "nbdev>=2.3.35"]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name="ragas_experimental"
+name = "ragas_experimental"
 description = "Experimental extensions for Ragas"
-requires-python=">=3.9"
-# List all fields that might be defined outside pyproject.toml as dynamic
-dynamic = ["version", "readme", "license", "authors", "dependencies", "optional-dependencies"]
+requires-python = ">=3.9"
+authors = [
+    {name = "jjmachan", email = "jamesjithin97@gmail.com"}
+]
+license = {text = "Apache-2.0"}
+keywords = ["jupyter", "notebook", "python", "evaluation", "llm", "ragas"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.9",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+]
+dependencies = [
+    "fastcore",
+    "tqdm", 
+    "langfuse",
+    "instructor",
+    "pydantic",
+    "numpy",
+    "plotly",
+    "mlflow",
+    "gitpython",
+    "httpx",
+]
+# Only version remains dynamic (managed by setuptools_scm)
+dynamic = ["version"]
+readme = "README.md"
+
+[project.optional-dependencies]
+all = ["pandas"]
+
+[project.entry-points."ragas.backends"]
+local_csv = "ragas_experimental.project.backends.local_csv:LocalCSVProjectBackend"
+platform = "ragas_experimental.project.backends.platform:PlatformProjectBackend"
+
+[tool.setuptools.packages.find]
+include = ["ragas_experimental*"]
+exclude = ["site*", "old_nbs*", "experiments*", "_proc*", "build*", "dist*"]
 
 [tool.setuptools_scm]
 root = ".."  # Points to monorepo root, one directory up
@@ -15,5 +53,19 @@ version_file = "ragas_experimental/_version.py"  # Creates a version file
 
 [dependency-groups]
 dev = [
-    "nbdev>=2.3.35",
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0", 
+    "pytest-mock>=3.10.0",
+    "black",
+    "ruff",
+]
+test = []
+
+[tool.pytest.ini_options]
+asyncio_default_fixture_loop_scope = "function"
+markers = [
+    "experimental_ci: Set of tests that will be run as part of Experimental CI",
+    "e2e: End-to-End tests for Experimental",
 ]
+
+# Ruff configuration is inherited from workspace.toml at the monorepo root
diff --git a/experimental/ragas_experimental/__init__.py b/experimental/ragas_experimental/__init__.py
index 5a13f34c8..455562211 100644
--- a/experimental/ragas_experimental/__init__.py
+++ b/experimental/ragas_experimental/__init__.py
@@ -1,33 +1,24 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/init_module.ipynb.
-
-# %% auto 0
-__all__ = []
-
-# %% ../nbs/api/init_module.ipynb 1
 # Get version from setuptools_scm-generated file
 try:
     from ._version import version as __version__
 except ImportError:
     # Fallback for installed package
-    from importlib.metadata import version as pkg_version, PackageNotFoundError
+    from importlib.metadata import PackageNotFoundError
+    from importlib.metadata import version as pkg_version
 
     try:
         __version__ = pkg_version("ragas_experimental")
     except PackageNotFoundError:
         __version__ = "unknown"
 
-# %% ../nbs/api/init_module.ipynb 3
-from .project.core import Project
 import ragas_experimental.model.notion_typing as nmt
-from .model.notion_model import NotionModel
 from ragas_experimental.model.pydantic_model import (
     ExtendedPydanticBaseModel as BaseModel,
 )
 
-# just import to run the module
-import ragas_experimental.project.datasets
-import ragas_experimental.project.experiments
-import ragas_experimental.project.comparison
+from .model.notion_model import NotionModel
+from .project.core import Project
+
+# Import the main Project class - decorators are added automatically in core.py
 
-# %% ../nbs/api/init_module.ipynb 4
 __all__ = ["Project", "NotionModel", "nmt", "BaseModel"]
diff --git a/experimental/ragas_experimental/_modidx.py b/experimental/ragas_experimental/_modidx.py
deleted file mode 100644
index 7a193b301..000000000
--- a/experimental/ragas_experimental/_modidx.py
+++ /dev/null
@@ -1,840 +0,0 @@
-# Autogenerated by nbdev
-
-d = { 'settings': { 'branch': 'main',
-                'doc_baseurl': '/ragas_experimental',
-                'doc_host': 'https://explodinggradients.github.io',
-                'git_url': 'https://github.com/explodinggradients/ragas_experimental',
-                'lib_path': 'ragas_experimental'},
-  'syms': { 'ragas_experimental.backends.factory': { 'ragas_experimental.backends.factory.RagasApiClientFactory': ( 'api/backends/factory.html#ragasapiclientfactory',
-                                                                                                                    'ragas_experimental/backends/factory.py'),
-                                                     'ragas_experimental.backends.factory.RagasApiClientFactory.create': ( 'api/backends/factory.html#ragasapiclientfactory.create',
-                                                                                                                           'ragas_experimental/backends/factory.py')},
-            'ragas_experimental.backends.mock_notion': { 'ragas_experimental.backends.mock_notion.MockBlockChildrenAPI': ( 'backends/mock_notion_client.html#mockblockchildrenapi',
-                                                                                                                           'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockBlockChildrenAPI.__init__': ( 'backends/mock_notion_client.html#mockblockchildrenapi.__init__',
-                                                                                                                                    'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockBlockChildrenAPI.list': ( 'backends/mock_notion_client.html#mockblockchildrenapi.list',
-                                                                                                                                'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockBlocksAPI': ( 'backends/mock_notion_client.html#mockblocksapi',
-                                                                                                                    'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockBlocksAPI.__init__': ( 'backends/mock_notion_client.html#mockblocksapi.__init__',
-                                                                                                                             'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockBlocksAPI.retrieve': ( 'backends/mock_notion_client.html#mockblocksapi.retrieve',
-                                                                                                                             'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI': ( 'backends/mock_notion_client.html#mockdatabasesapi',
-                                                                                                                       'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI.__init__': ( 'backends/mock_notion_client.html#mockdatabasesapi.__init__',
-                                                                                                                                'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI._extract_title': ( 'backends/mock_notion_client.html#mockdatabasesapi._extract_title',
-                                                                                                                                      'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI.create': ( 'backends/mock_notion_client.html#mockdatabasesapi.create',
-                                                                                                                              'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI.query': ( 'backends/mock_notion_client.html#mockdatabasesapi.query',
-                                                                                                                             'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockDatabasesAPI.retrieve': ( 'backends/mock_notion_client.html#mockdatabasesapi.retrieve',
-                                                                                                                                'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient': ( 'backends/mock_notion_client.html#mocknotionclient',
-                                                                                                                       'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.__init__': ( 'backends/mock_notion_client.html#mocknotionclient.__init__',
-                                                                                                                                'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.__str__': ( 'backends/mock_notion_client.html#mocknotionclient.__str__',
-                                                                                                                               'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient._create_id': ( 'backends/mock_notion_client.html#mocknotionclient._create_id',
-                                                                                                                                  'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient._get_timestamp': ( 'backends/mock_notion_client.html#mocknotionclient._get_timestamp',
-                                                                                                                                      'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.add_block': ( 'backends/mock_notion_client.html#mocknotionclient.add_block',
-                                                                                                                                 'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.add_children': ( 'backends/mock_notion_client.html#mocknotionclient.add_children',
-                                                                                                                                    'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.add_database': ( 'backends/mock_notion_client.html#mocknotionclient.add_database',
-                                                                                                                                    'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockNotionClient.add_page': ( 'backends/mock_notion_client.html#mocknotionclient.add_page',
-                                                                                                                                'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI': ( 'backends/mock_notion_client.html#mockpagesapi',
-                                                                                                                   'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI.__init__': ( 'backends/mock_notion_client.html#mockpagesapi.__init__',
-                                                                                                                            'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI._extract_title': ( 'backends/mock_notion_client.html#mockpagesapi._extract_title',
-                                                                                                                                  'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI.create': ( 'backends/mock_notion_client.html#mockpagesapi.create',
-                                                                                                                          'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI.retrieve': ( 'backends/mock_notion_client.html#mockpagesapi.retrieve',
-                                                                                                                            'ragas_experimental/backends/mock_notion.py'),
-                                                         'ragas_experimental.backends.mock_notion.MockPagesAPI.update': ( 'backends/mock_notion_client.html#mockpagesapi.update',
-                                                                                                                          'ragas_experimental/backends/mock_notion.py')},
-            'ragas_experimental.backends.notion_backend': { 'ragas_experimental.backends.notion_backend.NotionBackend': ( 'backends/notion.html#notionbackend',
-                                                                                                                          'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.__init__': ( 'backends/notion.html#notionbackend.__init__',
-                                                                                                                                   'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.__repr__': ( 'backends/notion.html#notionbackend.__repr__',
-                                                                                                                                   'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.create_new_database': ( 'backends/notion.html#notionbackend.create_new_database',
-                                                                                                                                              'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.create_new_page': ( 'backends/notion.html#notionbackend.create_new_page',
-                                                                                                                                          'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.create_page_in_database': ( 'backends/notion.html#notionbackend.create_page_in_database',
-                                                                                                                                                  'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.get_database': ( 'backends/notion.html#notionbackend.get_database',
-                                                                                                                                       'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.get_database_id': ( 'backends/notion.html#notionbackend.get_database_id',
-                                                                                                                                          'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.get_page_id': ( 'backends/notion.html#notionbackend.get_page_id',
-                                                                                                                                      'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.page_exists': ( 'backends/notion.html#notionbackend.page_exists',
-                                                                                                                                      'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.query_database': ( 'backends/notion.html#notionbackend.query_database',
-                                                                                                                                         'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.update_page': ( 'backends/notion.html#notionbackend.update_page',
-                                                                                                                                      'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.NotionBackend.validate_project_structure': ( 'backends/notion.html#notionbackend.validate_project_structure',
-                                                                                                                                                     'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.get_database_id': ( 'backends/notion.html#get_database_id',
-                                                                                                                            'ragas_experimental/backends/notion_backend.py'),
-                                                            'ragas_experimental.backends.notion_backend.get_page_id': ( 'backends/notion.html#get_page_id',
-                                                                                                                        'ragas_experimental/backends/notion_backend.py')},
-            'ragas_experimental.backends.ragas_api_client': { 'ragas_experimental.backends.ragas_api_client.Column': ( 'api/backends/ragas_api_client.html#column',
-                                                                                                                       'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient': ( 'api/backends/ragas_api_client.html#ragasapiclient',
-                                                                                                                               'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.__init__': ( 'api/backends/ragas_api_client.html#ragasapiclient.__init__',
-                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._create_resource': ( 'api/backends/ragas_api_client.html#ragasapiclient._create_resource',
-                                                                                                                                                'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._create_with_data': ( 'api/backends/ragas_api_client.html#ragasapiclient._create_with_data',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._delete_resource': ( 'api/backends/ragas_api_client.html#ragasapiclient._delete_resource',
-                                                                                                                                                'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._get_resource': ( 'api/backends/ragas_api_client.html#ragasapiclient._get_resource',
-                                                                                                                                             'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._get_resource_by_name': ( 'api/backends/ragas_api_client.html#ragasapiclient._get_resource_by_name',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._list_resources': ( 'api/backends/ragas_api_client.html#ragasapiclient._list_resources',
-                                                                                                                                               'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._request': ( 'api/backends/ragas_api_client.html#ragasapiclient._request',
-                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient._update_resource': ( 'api/backends/ragas_api_client.html#ragasapiclient._update_resource',
-                                                                                                                                                'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.convert_raw_data': ( 'api/backends/ragas_api_client.html#ragasapiclient.convert_raw_data',
-                                                                                                                                                'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_column',
-                                                                                                                                             'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_column_map': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_column_map',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_dataset': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_dataset',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_dataset_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_dataset_column',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_dataset_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_dataset_row',
-                                                                                                                                                  'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_dataset_with_data': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_dataset_with_data',
-                                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_experiment': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_experiment',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_experiment_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_experiment_column',
-                                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_experiment_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_experiment_row',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_experiment_with_data': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_experiment_with_data',
-                                                                                                                                                           'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_project': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_project',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.create_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.create_row',
-                                                                                                                                          'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_dataset': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_dataset',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_dataset_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_dataset_column',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_dataset_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_dataset_row',
-                                                                                                                                                  'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_experiment': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_experiment',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_experiment_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_experiment_column',
-                                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_experiment_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_experiment_row',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.delete_project': ( 'api/backends/ragas_api_client.html#ragasapiclient.delete_project',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_dataset': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_dataset',
-                                                                                                                                           'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_dataset_by_name': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_dataset_by_name',
-                                                                                                                                                   'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_dataset_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_dataset_column',
-                                                                                                                                                  'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_dataset_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_dataset_row',
-                                                                                                                                               'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_experiment': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_experiment',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_experiment_by_name': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_experiment_by_name',
-                                                                                                                                                      'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_experiment_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_experiment_column',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_experiment_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_experiment_row',
-                                                                                                                                                  'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_project': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_project',
-                                                                                                                                           'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.get_project_by_name': ( 'api/backends/ragas_api_client.html#ragasapiclient.get_project_by_name',
-                                                                                                                                                   'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_dataset_columns': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_dataset_columns',
-                                                                                                                                                    'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_dataset_rows': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_dataset_rows',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_datasets': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_datasets',
-                                                                                                                                             'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_experiment_columns': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_experiment_columns',
-                                                                                                                                                       'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_experiment_rows': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_experiment_rows',
-                                                                                                                                                    'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_experiments': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_experiments',
-                                                                                                                                                'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.list_projects': ( 'api/backends/ragas_api_client.html#ragasapiclient.list_projects',
-                                                                                                                                             'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_dataset': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_dataset',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_dataset_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_dataset_column',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_dataset_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_dataset_row',
-                                                                                                                                                  'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_experiment': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_experiment',
-                                                                                                                                                 'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_experiment_column': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_experiment_column',
-                                                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_experiment_row': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_experiment_row',
-                                                                                                                                                     'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RagasApiClient.update_project': ( 'api/backends/ragas_api_client.html#ragasapiclient.update_project',
-                                                                                                                                              'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.Row': ( 'api/backends/ragas_api_client.html#row',
-                                                                                                                    'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.RowCell': ( 'api/backends/ragas_api_client.html#rowcell',
-                                                                                                                        'ragas_experimental/backends/ragas_api_client.py'),
-                                                              'ragas_experimental.backends.ragas_api_client.create_nano_id': ( 'api/backends/ragas_api_client.html#create_nano_id',
-                                                                                                                               'ragas_experimental/backends/ragas_api_client.py')},
-            'ragas_experimental.core': {'ragas_experimental.core.foo': ('core.html#foo', 'ragas_experimental/core.py')},
-            'ragas_experimental.dataset': { 'ragas_experimental.dataset.Dataset': ( 'api/dataset.html#dataset',
-                                                                                    'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__getitem__': ( 'api/dataset.html#dataset.__getitem__',
-                                                                                                'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__init__': ( 'api/dataset.html#dataset.__init__',
-                                                                                             'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__iter__': ( 'api/dataset.html#dataset.__iter__',
-                                                                                             'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__len__': ( 'api/dataset.html#dataset.__len__',
-                                                                                            'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__repr__': ( 'api/dataset.html#dataset.__repr__',
-                                                                                             'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.__setitem__': ( 'api/dataset.html#dataset.__setitem__',
-                                                                                                'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset._update_local_entry': ( 'api/dataset.html#dataset._update_local_entry',
-                                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.append': ( 'api/dataset.html#dataset.append',
-                                                                                           'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.get': ( 'api/dataset.html#dataset.get',
-                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.load': ( 'api/dataset.html#dataset.load',
-                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.load_as_dicts': ( 'api/dataset.html#dataset.load_as_dicts',
-                                                                                                  'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.pop': ( 'api/dataset.html#dataset.pop',
-                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.save': ( 'api/dataset.html#dataset.save',
-                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.Dataset.to_pandas': ( 'api/dataset.html#dataset.to_pandas',
-                                                                                              'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend': ( 'api/dataset.html#datasetbackend',
-                                                                                           'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.append_entry': ( 'api/dataset.html#datasetbackend.append_entry',
-                                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.delete_entry': ( 'api/dataset.html#datasetbackend.delete_entry',
-                                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.get_column_mapping': ( 'api/dataset.html#datasetbackend.get_column_mapping',
-                                                                                                              'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.get_entry_by_field': ( 'api/dataset.html#datasetbackend.get_entry_by_field',
-                                                                                                              'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.initialize': ( 'api/dataset.html#datasetbackend.initialize',
-                                                                                                      'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.load_entries': ( 'api/dataset.html#datasetbackend.load_entries',
-                                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.DatasetBackend.update_entry': ( 'api/dataset.html#datasetbackend.update_entry',
-                                                                                                        'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend': ( 'api/dataset.html#localbackend',
-                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.__init__': ( 'api/dataset.html#localbackend.__init__',
-                                                                                                  'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.__repr__': ( 'api/dataset.html#localbackend.__repr__',
-                                                                                                  'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.__str__': ( 'api/dataset.html#localbackend.__str__',
-                                                                                                 'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend._ensure_csv_exists': ( 'api/dataset.html#localbackend._ensure_csv_exists',
-                                                                                                            'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend._get_csv_path': ( 'api/dataset.html#localbackend._get_csv_path',
-                                                                                                       'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend._write_entries_to_csv': ( 'api/dataset.html#localbackend._write_entries_to_csv',
-                                                                                                               'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.append_entry': ( 'api/dataset.html#localbackend.append_entry',
-                                                                                                      'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.delete_entry': ( 'api/dataset.html#localbackend.delete_entry',
-                                                                                                      'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.get_column_mapping': ( 'api/dataset.html#localbackend.get_column_mapping',
-                                                                                                            'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.get_entry_by_field': ( 'api/dataset.html#localbackend.get_entry_by_field',
-                                                                                                            'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.initialize': ( 'api/dataset.html#localbackend.initialize',
-                                                                                                    'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.load_entries': ( 'api/dataset.html#localbackend.load_entries',
-                                                                                                      'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.LocalBackend.update_entry': ( 'api/dataset.html#localbackend.update_entry',
-                                                                                                      'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend': ( 'api/dataset.html#ragasappbackend',
-                                                                                            'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.__init__': ( 'api/dataset.html#ragasappbackend.__init__',
-                                                                                                     'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.__repr__': ( 'api/dataset.html#ragasappbackend.__repr__',
-                                                                                                     'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.__str__': ( 'api/dataset.html#ragasappbackend.__str__',
-                                                                                                    'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.append_entry': ( 'api/dataset.html#ragasappbackend.append_entry',
-                                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.delete_entry': ( 'api/dataset.html#ragasappbackend.delete_entry',
-                                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.get_column_mapping': ( 'api/dataset.html#ragasappbackend.get_column_mapping',
-                                                                                                               'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.get_entry_by_field': ( 'api/dataset.html#ragasappbackend.get_entry_by_field',
-                                                                                                               'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.initialize': ( 'api/dataset.html#ragasappbackend.initialize',
-                                                                                                       'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.load_entries': ( 'api/dataset.html#ragasappbackend.load_entries',
-                                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.RagasAppBackend.update_entry': ( 'api/dataset.html#ragasappbackend.update_entry',
-                                                                                                         'ragas_experimental/dataset.py'),
-                                            'ragas_experimental.dataset.create_dataset_backend': ( 'api/dataset.html#create_dataset_backend',
-                                                                                                   'ragas_experimental/dataset.py')},
-            'ragas_experimental.embedding.base': { 'ragas_experimental.embedding.base.BaseEmbedding': ( 'api/embedding/base.html#baseembedding',
-                                                                                                        'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.BaseEmbedding.aembed_document': ( 'api/embedding/base.html#baseembedding.aembed_document',
-                                                                                                                        'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.BaseEmbedding.aembed_text': ( 'api/embedding/base.html#baseembedding.aembed_text',
-                                                                                                                    'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.BaseEmbedding.embed_document': ( 'api/embedding/base.html#baseembedding.embed_document',
-                                                                                                                       'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.BaseEmbedding.embed_text': ( 'api/embedding/base.html#baseembedding.embed_text',
-                                                                                                                   'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings': ( 'api/embedding/base.html#openaiembeddings',
-                                                                                                           'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings.__init__': ( 'api/embedding/base.html#openaiembeddings.__init__',
-                                                                                                                    'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings.aembed_document': ( 'api/embedding/base.html#openaiembeddings.aembed_document',
-                                                                                                                           'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings.aembed_text': ( 'api/embedding/base.html#openaiembeddings.aembed_text',
-                                                                                                                       'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings.embed_document': ( 'api/embedding/base.html#openaiembeddings.embed_document',
-                                                                                                                          'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.OpenAIEmbeddings.embed_text': ( 'api/embedding/base.html#openaiembeddings.embed_text',
-                                                                                                                      'ragas_experimental/embedding/base.py'),
-                                                   'ragas_experimental.embedding.base.ragas_embedding': ( 'api/embedding/base.html#ragas_embedding',
-                                                                                                          'ragas_experimental/embedding/base.py')},
-            'ragas_experimental.exceptions': { 'ragas_experimental.exceptions.DatasetNotFoundError': ( 'api/exceptions.html#datasetnotfounderror',
-                                                                                                       'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.DuplicateDatasetError': ( 'api/exceptions.html#duplicatedataseterror',
-                                                                                                        'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.DuplicateError': ( 'api/exceptions.html#duplicateerror',
-                                                                                                 'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.DuplicateExperimentError': ( 'api/exceptions.html#duplicateexperimenterror',
-                                                                                                           'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.DuplicateProjectError': ( 'api/exceptions.html#duplicateprojecterror',
-                                                                                                        'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.DuplicateResourceError': ( 'api/exceptions.html#duplicateresourceerror',
-                                                                                                         'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.ExperimentNotFoundError': ( 'api/exceptions.html#experimentnotfounderror',
-                                                                                                          'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.NotFoundError': ( 'api/exceptions.html#notfounderror',
-                                                                                                'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.ProjectNotFoundError': ( 'api/exceptions.html#projectnotfounderror',
-                                                                                                       'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.RagasError': ( 'api/exceptions.html#ragaserror',
-                                                                                             'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.ResourceNotFoundError': ( 'api/exceptions.html#resourcenotfounderror',
-                                                                                                        'ragas_experimental/exceptions.py'),
-                                               'ragas_experimental.exceptions.ValidationError': ( 'api/exceptions.html#validationerror',
-                                                                                                  'ragas_experimental/exceptions.py')},
-            'ragas_experimental.experiment': { 'ragas_experimental.experiment.Experiment': ( 'api/experiment.html#experiment',
-                                                                                             'ragas_experimental/experiment.py'),
-                                               'ragas_experimental.experiment.Experiment.__init__': ( 'api/experiment.html#experiment.__init__',
-                                                                                                      'ragas_experimental/experiment.py'),
-                                               'ragas_experimental.experiment.Experiment.__str__': ( 'api/experiment.html#experiment.__str__',
-                                                                                                     'ragas_experimental/experiment.py')},
-            'ragas_experimental.llm.llm': { 'ragas_experimental.llm.llm.RagasLLM': ( 'api/llm/llm.html#ragasllm',
-                                                                                     'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM.__init__': ( 'api/llm/llm.html#ragasllm.__init__',
-                                                                                              'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM._check_client_async': ( 'api/llm/llm.html#ragasllm._check_client_async',
-                                                                                                         'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM._initialize_client': ( 'api/llm/llm.html#ragasllm._initialize_client',
-                                                                                                        'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM._run_async_in_current_loop': ( 'api/llm/llm.html#ragasllm._run_async_in_current_loop',
-                                                                                                                'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM.agenerate': ( 'api/llm/llm.html#ragasllm.agenerate',
-                                                                                               'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.RagasLLM.generate': ( 'api/llm/llm.html#ragasllm.generate',
-                                                                                              'ragas_experimental/llm/llm.py'),
-                                            'ragas_experimental.llm.llm.ragas_llm': ( 'api/llm/llm.html#ragas_llm',
-                                                                                      'ragas_experimental/llm/llm.py')},
-            'ragas_experimental.metric.base': { 'ragas_experimental.metric.base.Metric': ( 'api/metric/base.html#metric',
-                                                                                           'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.__post_init__': ( 'api/metric/base.html#metric.__post_init__',
-                                                                                                         'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric._ensemble': ( 'api/metric/base.html#metric._ensemble',
-                                                                                                     'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric._get_response_model': ( 'api/metric/base.html#metric._get_response_model',
-                                                                                                               'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.abatch_score': ( 'api/metric/base.html#metric.abatch_score',
-                                                                                                        'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.ascore': ( 'api/metric/base.html#metric.ascore',
-                                                                                                  'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.batch_score': ( 'api/metric/base.html#metric.batch_score',
-                                                                                                       'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.get_variables': ( 'api/metric/base.html#metric.get_variables',
-                                                                                                         'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.score': ( 'api/metric/base.html#metric.score',
-                                                                                                 'ragas_experimental/metric/base.py'),
-                                                'ragas_experimental.metric.base.Metric.train': ( 'api/metric/base.html#metric.train',
-                                                                                                 'ragas_experimental/metric/base.py')},
-            'ragas_experimental.metric.decorator': { 'ragas_experimental.metric.decorator.create_metric_decorator': ( 'api/metric/decorator.html#create_metric_decorator',
-                                                                                                                      'ragas_experimental/metric/decorator.py')},
-            'ragas_experimental.metric.discrete': { 'ragas_experimental.metric.discrete.DiscreteMetric': ( 'api/metric/discrete.html#discretemetric',
-                                                                                                           'ragas_experimental/metric/discrete.py'),
-                                                    'ragas_experimental.metric.discrete.DiscreteMetric._ensemble': ( 'api/metric/discrete.html#discretemetric._ensemble',
-                                                                                                                     'ragas_experimental/metric/discrete.py'),
-                                                    'ragas_experimental.metric.discrete.DiscreteMetric._get_response_model': ( 'api/metric/discrete.html#discretemetric._get_response_model',
-                                                                                                                               'ragas_experimental/metric/discrete.py')},
-            'ragas_experimental.metric.numeric': { 'ragas_experimental.metric.numeric.NumericMetric': ( 'api/metric/numeric.html#numericmetric',
-                                                                                                        'ragas_experimental/metric/numeric.py'),
-                                                   'ragas_experimental.metric.numeric.NumericMetric._ensemble': ( 'api/metric/numeric.html#numericmetric._ensemble',
-                                                                                                                  'ragas_experimental/metric/numeric.py'),
-                                                   'ragas_experimental.metric.numeric.NumericMetric._get_response_model': ( 'api/metric/numeric.html#numericmetric._get_response_model',
-                                                                                                                            'ragas_experimental/metric/numeric.py')},
-            'ragas_experimental.metric.ranking': { 'ragas_experimental.metric.ranking.RankingMetric': ( 'api/metric/ranking.html#rankingmetric',
-                                                                                                        'ragas_experimental/metric/ranking.py'),
-                                                   'ragas_experimental.metric.ranking.RankingMetric._ensemble': ( 'api/metric/ranking.html#rankingmetric._ensemble',
-                                                                                                                  'ragas_experimental/metric/ranking.py'),
-                                                   'ragas_experimental.metric.ranking.RankingMetric._get_response_model': ( 'api/metric/ranking.html#rankingmetric._get_response_model',
-                                                                                                                            'ragas_experimental/metric/ranking.py')},
-            'ragas_experimental.metric.result': { 'ragas_experimental.metric.result.MetricResult': ( 'api/metric/result.html#metricresult',
-                                                                                                     'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__add__': ( 'api/metric/result.html#metricresult.__add__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__eq__': ( 'api/metric/result.html#metricresult.__eq__',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__float__': ( 'api/metric/result.html#metricresult.__float__',
-                                                                                                               'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__ge__': ( 'api/metric/result.html#metricresult.__ge__',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__get_pydantic_core_schema__': ( 'api/metric/result.html#metricresult.__get_pydantic_core_schema__',
-                                                                                                                                  'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__getattr__': ( 'api/metric/result.html#metricresult.__getattr__',
-                                                                                                                 'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__getitem__': ( 'api/metric/result.html#metricresult.__getitem__',
-                                                                                                                 'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__gt__': ( 'api/metric/result.html#metricresult.__gt__',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__init__': ( 'api/metric/result.html#metricresult.__init__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__int__': ( 'api/metric/result.html#metricresult.__int__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__iter__': ( 'api/metric/result.html#metricresult.__iter__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__json__': ( 'api/metric/result.html#metricresult.__json__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__le__': ( 'api/metric/result.html#metricresult.__le__',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__len__': ( 'api/metric/result.html#metricresult.__len__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__lt__': ( 'api/metric/result.html#metricresult.__lt__',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__mul__': ( 'api/metric/result.html#metricresult.__mul__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__radd__': ( 'api/metric/result.html#metricresult.__radd__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__repr__': ( 'api/metric/result.html#metricresult.__repr__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__rmul__': ( 'api/metric/result.html#metricresult.__rmul__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__rsub__': ( 'api/metric/result.html#metricresult.__rsub__',
-                                                                                                              'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__rtruediv__': ( 'api/metric/result.html#metricresult.__rtruediv__',
-                                                                                                                  'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__str__': ( 'api/metric/result.html#metricresult.__str__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__sub__': ( 'api/metric/result.html#metricresult.__sub__',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.__truediv__': ( 'api/metric/result.html#metricresult.__truediv__',
-                                                                                                                 'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.result': ( 'api/metric/result.html#metricresult.result',
-                                                                                                            'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.to_dict': ( 'api/metric/result.html#metricresult.to_dict',
-                                                                                                             'ragas_experimental/metric/result.py'),
-                                                  'ragas_experimental.metric.result.MetricResult.validate': ( 'api/metric/result.html#metricresult.validate',
-                                                                                                              'ragas_experimental/metric/result.py')},
-            'ragas_experimental.model.notion_model': { 'ragas_experimental.model.notion_model.NotionModel': ( 'model/notion_model.html#notionmodel',
-                                                                                                              'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.__getattr__': ( 'model/notion_model.html#notionmodel.__getattr__',
-                                                                                                                          'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.__init__': ( 'model/notion_model.html#notionmodel.__init__',
-                                                                                                                       'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.__repr__': ( 'model/notion_model.html#notionmodel.__repr__',
-                                                                                                                       'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.__setattr__': ( 'model/notion_model.html#notionmodel.__setattr__',
-                                                                                                                          'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.from_notion': ( 'model/notion_model.html#notionmodel.from_notion',
-                                                                                                                          'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModel.to_notion': ( 'model/notion_model.html#notionmodel.to_notion',
-                                                                                                                        'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModelMeta': ( 'model/notion_model.html#notionmodelmeta',
-                                                                                                                  'ragas_experimental/model/notion_model.py'),
-                                                       'ragas_experimental.model.notion_model.NotionModelMeta.__new__': ( 'model/notion_model.html#notionmodelmeta.__new__',
-                                                                                                                          'ragas_experimental/model/notion_model.py')},
-            'ragas_experimental.model.notion_typing': { 'ragas_experimental.model.notion_typing.Field': ( 'model/notion_types.html#field',
-                                                                                                          'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field.__get__': ( 'model/notion_types.html#field.__get__',
-                                                                                                                  'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field.__init__': ( 'model/notion_types.html#field.__init__',
-                                                                                                                   'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field.__set__': ( 'model/notion_types.html#field.__set__',
-                                                                                                                  'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field.__set_name__': ( 'model/notion_types.html#field.__set_name__',
-                                                                                                                       'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field._from_notion': ( 'model/notion_types.html#field._from_notion',
-                                                                                                                       'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field._to_notion': ( 'model/notion_types.html#field._to_notion',
-                                                                                                                     'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field._to_notion_property': ( 'model/notion_types.html#field._to_notion_property',
-                                                                                                                              'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Field.validate': ( 'model/notion_types.html#field.validate',
-                                                                                                                   'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID': ( 'model/notion_types.html#id',
-                                                                                                       'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID.__init__': ( 'model/notion_types.html#id.__init__',
-                                                                                                                'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID.__new__': ( 'model/notion_types.html#id.__new__',
-                                                                                                               'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID._from_notion': ( 'model/notion_types.html#id._from_notion',
-                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID._to_notion': ( 'model/notion_types.html#id._to_notion',
-                                                                                                                  'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID._to_notion_property': ( 'model/notion_types.html#id._to_notion_property',
-                                                                                                                           'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.ID.validate': ( 'model/notion_types.html#id.validate',
-                                                                                                                'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect': ( 'model/notion_types.html#multiselect',
-                                                                                                                'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect.__init__': ( 'model/notion_types.html#multiselect.__init__',
-                                                                                                                         'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect.__new__': ( 'model/notion_types.html#multiselect.__new__',
-                                                                                                                        'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect._from_notion': ( 'model/notion_types.html#multiselect._from_notion',
-                                                                                                                             'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect._to_notion': ( 'model/notion_types.html#multiselect._to_notion',
-                                                                                                                           'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect._to_notion_property': ( 'model/notion_types.html#multiselect._to_notion_property',
-                                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.MultiSelect.validate': ( 'model/notion_types.html#multiselect.validate',
-                                                                                                                         'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta': ( 'model/notion_types.html#notionfieldmeta',
-                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.__init__': ( 'model/notion_types.html#notionfieldmeta.__init__',
-                                                                                                                             'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.__set_name__': ( 'model/notion_types.html#notionfieldmeta.__set_name__',
-                                                                                                                                 'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.from_notion': ( 'model/notion_types.html#notionfieldmeta.from_notion',
-                                                                                                                                'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.to_notion': ( 'model/notion_types.html#notionfieldmeta.to_notion',
-                                                                                                                              'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.to_notion_property': ( 'model/notion_types.html#notionfieldmeta.to_notion_property',
-                                                                                                                                       'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.NotionFieldMeta.validate': ( 'model/notion_types.html#notionfieldmeta.validate',
-                                                                                                                             'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select': ( 'model/notion_types.html#select',
-                                                                                                           'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select.__init__': ( 'model/notion_types.html#select.__init__',
-                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select.__new__': ( 'model/notion_types.html#select.__new__',
-                                                                                                                   'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select._from_notion': ( 'model/notion_types.html#select._from_notion',
-                                                                                                                        'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select._to_notion': ( 'model/notion_types.html#select._to_notion',
-                                                                                                                      'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select._to_notion_property': ( 'model/notion_types.html#select._to_notion_property',
-                                                                                                                               'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Select.validate': ( 'model/notion_types.html#select.validate',
-                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Text': ( 'model/notion_types.html#text',
-                                                                                                         'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Text.__init__': ( 'model/notion_types.html#text.__init__',
-                                                                                                                  'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Text.__new__': ( 'model/notion_types.html#text.__new__',
-                                                                                                                 'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Text._from_notion': ( 'model/notion_types.html#text._from_notion',
-                                                                                                                      'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Text._to_notion': ( 'model/notion_types.html#text._to_notion',
-                                                                                                                    'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.TextNew': ( 'model/notion_types.html#textnew',
-                                                                                                            'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.TextNew.__init__': ( 'model/notion_types.html#textnew.__init__',
-                                                                                                                     'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.TextNew.from_notion': ( 'model/notion_types.html#textnew.from_notion',
-                                                                                                                        'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.TextNew.to_notion': ( 'model/notion_types.html#textnew.to_notion',
-                                                                                                                      'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Title': ( 'model/notion_types.html#title',
-                                                                                                          'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Title.__init__': ( 'model/notion_types.html#title.__init__',
-                                                                                                                   'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Title.__new__': ( 'model/notion_types.html#title.__new__',
-                                                                                                                  'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Title._from_notion': ( 'model/notion_types.html#title._from_notion',
-                                                                                                                       'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.Title._to_notion': ( 'model/notion_types.html#title._to_notion',
-                                                                                                                     'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL': ( 'model/notion_types.html#url',
-                                                                                                        'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL.__init__': ( 'model/notion_types.html#url.__init__',
-                                                                                                                 'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL.__new__': ( 'model/notion_types.html#url.__new__',
-                                                                                                                'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL._from_notion': ( 'model/notion_types.html#url._from_notion',
-                                                                                                                     'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL._to_notion': ( 'model/notion_types.html#url._to_notion',
-                                                                                                                   'ragas_experimental/model/notion_typing.py'),
-                                                        'ragas_experimental.model.notion_typing.URL.validate': ( 'model/notion_types.html#url.validate',
-                                                                                                                 'ragas_experimental/model/notion_typing.py')},
-            'ragas_experimental.model.pydantic_model': { 'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel',
-                                                                                                                                'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel.__init__': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel.__init__',
-                                                                                                                                         'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel._initialize_column_mapping': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel._initialize_column_mapping',
-                                                                                                                                                           'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel._is_metric_result_field': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel._is_metric_result_field',
-                                                                                                                                                        'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel.get_column_id': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel.get_column_id',
-                                                                                                                                              'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel.get_db_field_mapping': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel.get_db_field_mapping',
-                                                                                                                                                     'ragas_experimental/model/pydantic_model.py'),
-                                                         'ragas_experimental.model.pydantic_model.ExtendedPydanticBaseModel.set_column_id': ( 'api/model/pydantic_mode.html#extendedpydanticbasemodel.set_column_id',
-                                                                                                                                              'ragas_experimental/model/pydantic_model.py')},
-            'ragas_experimental.project': { 'ragas_experimental.project.ExperimentProtocol': ( 'project/experiments.html#experimentprotocol',
-                                                                                               'ragas_experimental/project.py'),
-                                            'ragas_experimental.project.ExperimentProtocol.__call__': ( 'project/experiments.html#experimentprotocol.__call__',
-                                                                                                        'ragas_experimental/project.py'),
-                                            'ragas_experimental.project.ExperimentProtocol.run_async': ( 'project/experiments.html#experimentprotocol.run_async',
-                                                                                                         'ragas_experimental/project.py'),
-                                            'ragas_experimental.project.Project.create_experiment': ( 'project/experiments.html#project.create_experiment',
-                                                                                                      'ragas_experimental/project.py'),
-                                            'ragas_experimental.project.Project.experiment': ( 'project/experiments.html#project.experiment',
-                                                                                               'ragas_experimental/project.py'),
-                                            'ragas_experimental.project.Project.get_experiment': ( 'project/experiments.html#project.get_experiment',
-                                                                                                   'ragas_experimental/project.py')},
-            'ragas_experimental.project.comparison': { 'ragas_experimental.project.comparison.Project.compare_experiments': ( 'project/comparison.html#project.compare_experiments',
-                                                                                                                              'ragas_experimental/project/comparison.py'),
-                                                       'ragas_experimental.project.comparison._combine_experiments': ( 'project/comparison.html#_combine_experiments',
-                                                                                                                       'ragas_experimental/project/comparison.py'),
-                                                       'ragas_experimental.project.comparison._get_title_property': ( 'project/comparison.html#_get_title_property',
-                                                                                                                      'ragas_experimental/project/comparison.py'),
-                                                       'ragas_experimental.project.comparison._model_to_dict': ( 'project/comparison.html#_model_to_dict',
-                                                                                                                 'ragas_experimental/project/comparison.py'),
-                                                       'ragas_experimental.project.comparison._validate_experiments': ( 'project/comparison.html#_validate_experiments',
-                                                                                                                        'ragas_experimental/project/comparison.py')},
-            'ragas_experimental.project.core': { 'ragas_experimental.project.core.Project': ( 'api/project/core.html#project',
-                                                                                              'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.__init__': ( 'api/project/core.html#project.__init__',
-                                                                                                       'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project._create_local_project_structure': ( 'api/project/core.html#project._create_local_project_structure',
-                                                                                                                              'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.create': ( 'api/project/core.html#project.create',
-                                                                                                     'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.delete': ( 'api/project/core.html#project.delete',
-                                                                                                     'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.get': ( 'api/project/core.html#project.get',
-                                                                                                  'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.get_dataset_path': ( 'api/project/core.html#project.get_dataset_path',
-                                                                                                               'ragas_experimental/project/core.py'),
-                                                 'ragas_experimental.project.core.Project.get_experiment_path': ( 'api/project/core.html#project.get_experiment_path',
-                                                                                                                  'ragas_experimental/project/core.py')},
-            'ragas_experimental.project.datasets': { 'ragas_experimental.project.datasets.Project.create_dataset': ( 'api/project/datasets.html#project.create_dataset',
-                                                                                                                     'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.Project.get_dataset': ( 'api/project/datasets.html#project.get_dataset',
-                                                                                                                  'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.Project.get_dataset_by_id': ( 'api/project/datasets.html#project.get_dataset_by_id',
-                                                                                                                        'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.Project.list_dataset_names': ( 'api/project/datasets.html#project.list_dataset_names',
-                                                                                                                         'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.create_dataset_columns': ( 'api/project/datasets.html#create_dataset_columns',
-                                                                                                                     'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.get_dataset_from_local': ( 'api/project/datasets.html#get_dataset_from_local',
-                                                                                                                     'ragas_experimental/project/datasets.py'),
-                                                     'ragas_experimental.project.datasets.get_dataset_from_ragas_app': ( 'api/project/datasets.html#get_dataset_from_ragas_app',
-                                                                                                                         'ragas_experimental/project/datasets.py')},
-            'ragas_experimental.project.experiments': { 'ragas_experimental.project.experiments.ExperimentProtocol': ( 'api/project/experiments.html#experimentprotocol',
-                                                                                                                       'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.ExperimentProtocol.__call__': ( 'api/project/experiments.html#experimentprotocol.__call__',
-                                                                                                                                'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.ExperimentProtocol.run_async': ( 'api/project/experiments.html#experimentprotocol.run_async',
-                                                                                                                                 'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.compare_and_plot': ( 'api/project/experiments.html#project.compare_and_plot',
-                                                                                                                             'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.create_experiment': ( 'api/project/experiments.html#project.create_experiment',
-                                                                                                                              'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.experiment': ( 'api/project/experiments.html#project.experiment',
-                                                                                                                       'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.get_experiment': ( 'api/project/experiments.html#project.get_experiment',
-                                                                                                                           'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.get_experiment_by_id': ( 'api/project/experiments.html#project.get_experiment_by_id',
-                                                                                                                                 'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.get_experiment_path': ( 'api/project/experiments.html#project.get_experiment_path',
-                                                                                                                                'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.langfuse_experiment': ( 'api/project/experiments.html#project.langfuse_experiment',
-                                                                                                                                'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.list_experiment_names': ( 'api/project/experiments.html#project.list_experiment_names',
-                                                                                                                                  'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.Project.mlflow_experiment': ( 'api/project/experiments.html#project.mlflow_experiment',
-                                                                                                                              'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.cleanup_experiment_branches': ( 'api/project/experiments.html#cleanup_experiment_branches',
-                                                                                                                                'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.create_experiment_columns': ( 'api/project/experiments.html#create_experiment_columns',
-                                                                                                                              'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.find_git_root': ( 'api/project/experiments.html#find_git_root',
-                                                                                                                  'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.get_experiment_from_local': ( 'api/project/experiments.html#get_experiment_from_local',
-                                                                                                                              'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.get_experiment_from_ragas_app': ( 'api/project/experiments.html#get_experiment_from_ragas_app',
-                                                                                                                                  'ragas_experimental/project/experiments.py'),
-                                                        'ragas_experimental.project.experiments.version_experiment': ( 'api/project/experiments.html#version_experiment',
-                                                                                                                       'ragas_experimental/project/experiments.py')},
-            'ragas_experimental.project.naming': { 'ragas_experimental.project.naming.MemorableNames': ( 'api/project/naming.html#memorablenames',
-                                                                                                         'ragas_experimental/project/naming.py'),
-                                                   'ragas_experimental.project.naming.MemorableNames.__init__': ( 'api/project/naming.html#memorablenames.__init__',
-                                                                                                                  'ragas_experimental/project/naming.py'),
-                                                   'ragas_experimental.project.naming.MemorableNames.generate_name': ( 'api/project/naming.html#memorablenames.generate_name',
-                                                                                                                       'ragas_experimental/project/naming.py'),
-                                                   'ragas_experimental.project.naming.MemorableNames.generate_unique_name': ( 'api/project/naming.html#memorablenames.generate_unique_name',
-                                                                                                                              'ragas_experimental/project/naming.py'),
-                                                   'ragas_experimental.project.naming.MemorableNames.generate_unique_names': ( 'api/project/naming.html#memorablenames.generate_unique_names',
-                                                                                                                               'ragas_experimental/project/naming.py')},
-            'ragas_experimental.prompt.base': { 'ragas_experimental.prompt.base.Prompt': ( 'api/prompt/base.html#prompt',
-                                                                                           'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt.__init__': ( 'api/prompt/base.html#prompt.__init__',
-                                                                                                    'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt.__str__': ( 'api/prompt/base.html#prompt.__str__',
-                                                                                                   'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt._format_examples': ( 'api/prompt/base.html#prompt._format_examples',
-                                                                                                            'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt._validate_instruction': ( 'api/prompt/base.html#prompt._validate_instruction',
-                                                                                                                 'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt.add_example': ( 'api/prompt/base.html#prompt.add_example',
-                                                                                                       'ragas_experimental/prompt/base.py'),
-                                                'ragas_experimental.prompt.base.Prompt.format': ( 'api/prompt/base.html#prompt.format',
-                                                                                                  'ragas_experimental/prompt/base.py')},
-            'ragas_experimental.prompt.dynamic_few_shot': { 'ragas_experimental.prompt.dynamic_few_shot.DynamicFewShotPrompt': ( 'api/prompt/dynamic_few_shot.html#dynamicfewshotprompt',
-                                                                                                                                 'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.DynamicFewShotPrompt.__init__': ( 'api/prompt/dynamic_few_shot.html#dynamicfewshotprompt.__init__',
-                                                                                                                                          'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.DynamicFewShotPrompt.add_example': ( 'api/prompt/dynamic_few_shot.html#dynamicfewshotprompt.add_example',
-                                                                                                                                             'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.DynamicFewShotPrompt.format': ( 'api/prompt/dynamic_few_shot.html#dynamicfewshotprompt.format',
-                                                                                                                                        'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.DynamicFewShotPrompt.from_prompt': ( 'api/prompt/dynamic_few_shot.html#dynamicfewshotprompt.from_prompt',
-                                                                                                                                             'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.ExampleStore': ( 'api/prompt/dynamic_few_shot.html#examplestore',
-                                                                                                                         'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.ExampleStore.add_example': ( 'api/prompt/dynamic_few_shot.html#examplestore.add_example',
-                                                                                                                                     'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.ExampleStore.get_examples': ( 'api/prompt/dynamic_few_shot.html#examplestore.get_examples',
-                                                                                                                                      'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore',
-                                                                                                                                 'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore.__init__': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore.__init__',
-                                                                                                                                          'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore.__len__': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore.__len__',
-                                                                                                                                         'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore._get_embedding': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore._get_embedding',
-                                                                                                                                                'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore._get_nearest_examples': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore._get_nearest_examples',
-                                                                                                                                                       'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore.add_example': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore.add_example',
-                                                                                                                                             'ragas_experimental/prompt/dynamic_few_shot.py'),
-                                                            'ragas_experimental.prompt.dynamic_few_shot.InMemoryExampleStore.get_examples': ( 'api/prompt/dynamic_few_shot.html#inmemoryexamplestore.get_examples',
-                                                                                                                                              'ragas_experimental/prompt/dynamic_few_shot.py')},
-            'ragas_experimental.tracing.langfuse': { 'ragas_experimental.tracing.langfuse.LangfuseTrace': ( 'api/tracing/langfuse.html#langfusetrace',
-                                                                                                            'ragas_experimental/tracing/langfuse.py'),
-                                                     'ragas_experimental.tracing.langfuse.LangfuseTrace.__init__': ( 'api/tracing/langfuse.html#langfusetrace.__init__',
-                                                                                                                     'ragas_experimental/tracing/langfuse.py'),
-                                                     'ragas_experimental.tracing.langfuse.LangfuseTrace.filter': ( 'api/tracing/langfuse.html#langfusetrace.filter',
-                                                                                                                   'ragas_experimental/tracing/langfuse.py'),
-                                                     'ragas_experimental.tracing.langfuse.LangfuseTrace.get_url': ( 'api/tracing/langfuse.html#langfusetrace.get_url',
-                                                                                                                    'ragas_experimental/tracing/langfuse.py'),
-                                                     'ragas_experimental.tracing.langfuse.add_query_param': ( 'api/tracing/langfuse.html#add_query_param',
-                                                                                                              'ragas_experimental/tracing/langfuse.py'),
-                                                     'ragas_experimental.tracing.langfuse.sync_trace': ( 'api/tracing/langfuse.html#sync_trace',
-                                                                                                         'ragas_experimental/tracing/langfuse.py')},
-            'ragas_experimental.tracing.mlflow': { 'ragas_experimental.tracing.mlflow.MLflowTrace': ( 'api/tracing/mlflow.html#mlflowtrace',
-                                                                                                      'ragas_experimental/tracing/mlflow.py'),
-                                                   'ragas_experimental.tracing.mlflow.MLflowTrace.__init__': ( 'api/tracing/mlflow.html#mlflowtrace.__init__',
-                                                                                                               'ragas_experimental/tracing/mlflow.py'),
-                                                   'ragas_experimental.tracing.mlflow.MLflowTrace.get_filter': ( 'api/tracing/mlflow.html#mlflowtrace.get_filter',
-                                                                                                                 'ragas_experimental/tracing/mlflow.py'),
-                                                   'ragas_experimental.tracing.mlflow.MLflowTrace.get_url': ( 'api/tracing/mlflow.html#mlflowtrace.get_url',
-                                                                                                              'ragas_experimental/tracing/mlflow.py'),
-                                                   'ragas_experimental.tracing.mlflow.sync_trace': ( 'api/tracing/mlflow.html#sync_trace',
-                                                                                                     'ragas_experimental/tracing/mlflow.py')},
-            'ragas_experimental.typing': { 'ragas_experimental.typing.Checkbox': ( 'api/typing.html#checkbox',
-                                                                                   'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Checkbox.__init__': ( 'api/typing.html#checkbox.__init__',
-                                                                                            'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.ColumnType': ( 'api/typing.html#columntype',
-                                                                                     'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Custom': ('api/typing.html#custom', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Custom.__init__': ( 'api/typing.html#custom.__init__',
-                                                                                          'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Date': ('api/typing.html#date', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Date.__init__': ( 'api/typing.html#date.__init__',
-                                                                                        'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.FieldMeta': ( 'api/typing.html#fieldmeta',
-                                                                                    'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.FieldMeta.__init__': ( 'api/typing.html#fieldmeta.__init__',
-                                                                                             'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.ModelConverter': ( 'api/typing.html#modelconverter',
-                                                                                         'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.ModelConverter.instance_to_row': ( 'api/typing.html#modelconverter.instance_to_row',
-                                                                                                         'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.ModelConverter.instances_to_rows': ( 'api/typing.html#modelconverter.instances_to_rows',
-                                                                                                           'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.ModelConverter.model_to_columns': ( 'api/typing.html#modelconverter.model_to_columns',
-                                                                                                          'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.MultiSelect': ( 'api/typing.html#multiselect',
-                                                                                      'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.MultiSelect.__init__': ( 'api/typing.html#multiselect.__init__',
-                                                                                               'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Number': ('api/typing.html#number', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Number.__init__': ( 'api/typing.html#number.__init__',
-                                                                                          'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Select': ('api/typing.html#select', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Select.__init__': ( 'api/typing.html#select.__init__',
-                                                                                          'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Text': ('api/typing.html#text', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Text.__init__': ( 'api/typing.html#text.__init__',
-                                                                                        'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Url': ('api/typing.html#url', 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.Url.__init__': ( 'api/typing.html#url.__init__',
-                                                                                       'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.get_colors_for_options': ( 'api/typing.html#get_colors_for_options',
-                                                                                                 'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.infer_field_type': ( 'api/typing.html#infer_field_type',
-                                                                                           'ragas_experimental/typing.py'),
-                                           'ragas_experimental.typing.infer_metric_result_type': ( 'api/typing.html#infer_metric_result_type',
-                                                                                                   'ragas_experimental/typing.py')},
-            'ragas_experimental.utils': { 'ragas_experimental.utils.async_to_sync': ( 'api/utils.html#async_to_sync',
-                                                                                      'ragas_experimental/utils.py'),
-                                          'ragas_experimental.utils.create_nano_id': ( 'api/utils.html#create_nano_id',
-                                                                                       'ragas_experimental/utils.py'),
-                                          'ragas_experimental.utils.get_test_directory': ( 'api/utils.html#get_test_directory',
-                                                                                           'ragas_experimental/utils.py'),
-                                          'ragas_experimental.utils.plot_experiments_as_subplots': ( 'api/utils.html#plot_experiments_as_subplots',
-                                                                                                     'ragas_experimental/utils.py')}}}
diff --git a/experimental/ragas_experimental/backends/factory.py b/experimental/ragas_experimental/backends/factory.py
index 39cd0ceac..3d48e1600 100644
--- a/experimental/ragas_experimental/backends/factory.py
+++ b/experimental/ragas_experimental/backends/factory.py
@@ -1,17 +1,13 @@
 """Factory class for creating the backends or mocked backends."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/backends/factory.ipynb.
+__all__ = ["RagasApiClientFactory"]
 
-# %% auto 0
-__all__ = ['RagasApiClientFactory']
-
-# %% ../../nbs/api/backends/factory.ipynb 2
-import typing as t
 import os
+import typing as t
 
 from .ragas_api_client import RagasApiClient
 
-# %% ../../nbs/api/backends/factory.ipynb 3
+
 class RagasApiClientFactory:
     """Factory for creating Ragas API client instances."""
 
diff --git a/experimental/ragas_experimental/backends/mock_notion.py b/experimental/ragas_experimental/backends/mock_notion.py
index a84e8e807..e9d4dba87 100644
--- a/experimental/ragas_experimental/backends/mock_notion.py
+++ b/experimental/ragas_experimental/backends/mock_notion.py
@@ -1,19 +1,20 @@
 """Helps with testing `ragas_annotator` better."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/backends/mock_notion_client.ipynb.
+__all__ = [
+    "MockPagesAPI",
+    "MockDatabasesAPI",
+    "MockBlocksAPI",
+    "MockBlockChildrenAPI",
+    "MockNotionClient",
+]
 
-# %% auto 0
-__all__ = ['MockPagesAPI', 'MockDatabasesAPI', 'MockBlocksAPI', 'MockBlockChildrenAPI', 'MockNotionClient']
-
-# %% ../../nbs/backends/mock_notion_client.ipynb 2
-import typing as t
 import uuid
 from copy import deepcopy
 from datetime import datetime
 
 from ..exceptions import NotFoundError
 
-# %% ../../nbs/backends/mock_notion_client.ipynb 3
+
 class MockPagesAPI:
     """Mock implementation of notion_client.Client.pages"""
 
@@ -96,7 +97,7 @@ def _extract_title(self, properties):
                         return text_obj["text"]["content"]
         return "Untitled"
 
-# %% ../../nbs/backends/mock_notion_client.ipynb 4
+
 class MockDatabasesAPI:
     """Mock implementation of notion_client.Client.databases"""
 
@@ -183,7 +184,7 @@ def _extract_title(self, title):
                 return text_obj["text"]["content"]
         return "Untitled"
 
-# %% ../../nbs/backends/mock_notion_client.ipynb 5
+
 class MockBlocksAPI:
     """Mock implementation of notion_client.Client.blocks"""
 
@@ -213,7 +214,7 @@ def list(self, block_id, start_cursor=None, page_size=100):
 
         return {"results": deepcopy(children), "has_more": False, "next_cursor": None}
 
-# %% ../../nbs/backends/mock_notion_client.ipynb 6
+
 class MockNotionClient:
     """Mock implementation of notion_client.Client for testing."""
 
diff --git a/experimental/ragas_experimental/backends/notion_backend.py b/experimental/ragas_experimental/backends/notion_backend.py
index f37ab0224..38c53b722 100644
--- a/experimental/ragas_experimental/backends/notion_backend.py
+++ b/experimental/ragas_experimental/backends/notion_backend.py
@@ -1,22 +1,16 @@
 """`Project` uses this backend to interact with the Notion API."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/backends/notion.ipynb.
+__all__ = ["NotionBackend", "get_page_id", "get_database_id"]
 
-# %% auto 0
-__all__ = ['NotionBackend', 'get_page_id', 'get_database_id']
-
-# %% ../../nbs/backends/notion.ipynb 3
-import typing as t
 import os
-from datetime import datetime
-import uuid
+import typing as t
 
+from fastcore.utils import patch, patch_to
 from notion_client import Client as NotionClient
-from fastcore.utils import patch_to, patch
 
 from ..exceptions import DuplicateError, NotFoundError
 
-# %% ../../nbs/backends/notion.ipynb 4
+
 class NotionBackend:
     """A backend for interacting with the Notion API"""
 
@@ -90,7 +84,7 @@ def page_exists(self, page_id):
         try:
             self.client.pages.retrieve(page_id)
             return True
-        except:
+        except Exception:
             return False
 
     def create_new_database(
@@ -113,7 +107,7 @@ def create_new_database(
         )
         return response["id"]
 
-# %% ../../nbs/backends/notion.ipynb 16
+
 @t.overload
 def get_page_id(
     self, parent_id: str, page_name: str, return_multiple: t.Literal[False] = False
@@ -171,19 +165,15 @@ def get_page_id(
             raise DuplicateError(f"Multiple pages found with name '{page_name}'")
         return matching_pages[0]
 
-# %% ../../nbs/backends/notion.ipynb 17
+
 @t.overload
 def get_database_id(
     self, parent_page_id: str, name: str, return_multiple: t.Literal[False] = False
 ) -> str: ...
-
-
 @t.overload
 def get_database_id(
     self, parent_page_id: str, name: str, return_multiple: t.Literal[True]
 ) -> t.List[str]: ...
-
-
 @patch_to(NotionBackend)
 def get_database_id(
     self, parent_page_id: str, name: str, return_multiple: bool = False
@@ -231,7 +221,7 @@ def get_database_id(
             raise DuplicateError(f"Multiple databases found with name '{name}'")
         return matching_databases[0]
 
-# %% ../../nbs/backends/notion.ipynb 18
+
 @patch
 def create_page_in_database(
     self: NotionBackend,
@@ -263,7 +253,7 @@ def create_page_in_database(
 
     return response
 
-# %% ../../nbs/backends/notion.ipynb 19
+
 @patch
 def get_database(self: NotionBackend, database_id: str) -> dict:
     """Get a database by ID.
@@ -276,7 +266,7 @@ def get_database(self: NotionBackend, database_id: str) -> dict:
     """
     return self.client.databases.retrieve(database_id=database_id)
 
-# %% ../../nbs/backends/notion.ipynb 20
+
 @patch
 def query_database(
     self: NotionBackend,
@@ -332,7 +322,7 @@ def query_database(
     # Return combined results
     return {"results": all_results, "has_more": False, "next_cursor": None}
 
-# %% ../../nbs/backends/notion.ipynb 21
+
 @patch
 def update_page(
     self: NotionBackend,
diff --git a/experimental/ragas_experimental/backends/ragas_api_client.py b/experimental/ragas_experimental/backends/ragas_api_client.py
index cf2563fae..0db826397 100644
--- a/experimental/ragas_experimental/backends/ragas_api_client.py
+++ b/experimental/ragas_experimental/backends/ragas_api_client.py
@@ -1,28 +1,33 @@
 """Python client to api.ragas.io"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/backends/ragas_api_client.ipynb.
+__all__ = [
+    "DEFAULT_SETTINGS",
+    "RagasApiClient",
+    "create_nano_id",
+    "Column",
+    "RowCell",
+    "Row",
+]
 
-# %% auto 0
-__all__ = ['DEFAULT_SETTINGS', 'RagasApiClient', 'create_nano_id', 'Column', 'RowCell', 'Row']
-
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 3
-import httpx
 import asyncio
+import string
 import typing as t
-from pydantic import BaseModel, Field
+import uuid
+
+import httpx
 from fastcore.utils import patch
+from pydantic import BaseModel, Field
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 4
 from ragas_experimental.exceptions import (
     DatasetNotFoundError,
     DuplicateDatasetError,
-    ProjectNotFoundError,
+    DuplicateExperimentError,
     DuplicateProjectError,
     ExperimentNotFoundError,
-    DuplicateExperimentError,
+    ProjectNotFoundError,
 )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 5
+
 class RagasApiClient:
     """Client for the Ragas Relay API."""
 
@@ -94,7 +99,7 @@ async def _delete_resource(self, path):
         """Generic resource deletion."""
         return await self._request("DELETE", path)
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 6
+
 @patch
 async def _get_resource_by_name(
     self: RagasApiClient,
@@ -180,7 +185,7 @@ async def _get_resource_by_name(
     else:
         return await get_method(matching_resources[0].get("id"))
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 8
+
 # ---- Projects ----
 @patch
 async def list_projects(
@@ -245,7 +250,7 @@ async def delete_project(self: RagasApiClient, project_id: str) -> None:
     """Delete a project."""
     await self._delete_resource(f"projects/{project_id}")
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 13
+
 @patch
 async def get_project_by_name(self: RagasApiClient, project_name: str) -> t.Dict:
     """Get a project by its name.
@@ -270,7 +275,7 @@ async def get_project_by_name(self: RagasApiClient, project_name: str) -> t.Dict
         resource_type_name="project",
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 16
+
 # ---- Datasets ----
 @patch
 async def list_datasets(
@@ -336,7 +341,7 @@ async def delete_dataset(
     """Delete a dataset."""
     await self._delete_resource(f"projects/{project_id}/datasets/{dataset_id}")
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 23
+
 @patch
 async def get_dataset_by_name(
     self: RagasApiClient, project_id: str, dataset_name: str
@@ -365,7 +370,7 @@ async def get_dataset_by_name(
         project_id=project_id,
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 26
+
 # ---- Experiments ----
 @patch
 async def list_experiments(
@@ -435,7 +440,7 @@ async def delete_experiment(
     """Delete an experiment."""
     await self._delete_resource(f"projects/{project_id}/experiments/{experiment_id}")
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 29
+
 @patch
 async def get_experiment_by_name(
     self: RagasApiClient, project_id: str, experiment_name: str
@@ -464,10 +469,7 @@ async def get_experiment_by_name(
         project_id=project_id,
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 33
-from ..typing import ColumnType
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 34
 # ---- Dataset Columns ----
 @patch
 async def list_dataset_columns(
@@ -546,7 +548,7 @@ async def delete_dataset_column(
         f"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}"
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 42
+
 # ---- Dataset Rows ----
 @patch
 async def list_dataset_rows(
@@ -611,11 +613,7 @@ async def delete_dataset_row(
         f"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}"
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 55
-import uuid
-import string
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 56
 def create_nano_id(size=12):
     # Define characters to use (alphanumeric)
     alphabet = string.ascii_letters + string.digits
@@ -632,28 +630,7 @@ def create_nano_id(size=12):
     # Pad if necessary and return desired length
     return result[:size]
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 58
-import uuid
-import string
-
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 59
-def create_nano_id(size=12):
-    # Define characters to use (alphanumeric)
-    alphabet = string.ascii_letters + string.digits
-
-    # Generate UUID and convert to int
-    uuid_int = uuid.uuid4().int
-
-    # Convert to base62
-    result = ""
-    while uuid_int:
-        uuid_int, remainder = divmod(uuid_int, len(alphabet))
-        result = alphabet[remainder] + result
 
-    # Pad if necessary and return desired length
-    return result[:size]
-
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 61
 # Default settings for columns
 DEFAULT_SETTINGS = {"is_required": False, "max_length": 1000}
 
@@ -676,7 +653,7 @@ class Row(BaseModel):
     id: str = Field(default_factory=create_nano_id)
     data: t.List[RowCell] = Field(...)
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 62
+
 # ---- Resource With Data Helper Methods ----
 @patch
 async def _create_with_data(
@@ -769,7 +746,7 @@ async def _create_with_data(
         if "resource" in locals():
             try:
                 await delete_fn(project_id, resource["id"])
-            except:
+            except Exception:
                 pass  # Ignore cleanup errors
         raise e
 
@@ -804,7 +781,7 @@ async def create_dataset_with_data(
         "dataset", project_id, name, description, columns, rows, batch_size
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 68
+
 # ---- Experiment Columns ----
 @patch
 async def list_experiment_columns(
@@ -948,7 +925,7 @@ async def delete_experiment_row(
         f"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}"
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 71
+
 @patch
 async def create_experiment_with_data(
     self: RagasApiClient,
@@ -979,7 +956,7 @@ async def create_experiment_with_data(
         "experiment", project_id, name, description, columns, rows, batch_size
     )
 
-# %% ../../nbs/api/backends/ragas_api_client.ipynb 72
+
 # ---- Utility Methods ----
 @patch
 def create_column(
diff --git a/experimental/ragas_experimental/core.py b/experimental/ragas_experimental/core.py
deleted file mode 100644
index 08435194c..000000000
--- a/experimental/ragas_experimental/core.py
+++ /dev/null
@@ -1,9 +0,0 @@
-"""Fill in a module description here"""
-
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/00_core.ipynb.
-
-# %% auto 0
-__all__ = ['foo']
-
-# %% ../nbs/00_core.ipynb 3
-def foo(): pass
diff --git a/experimental/ragas_experimental/dataset.py b/experimental/ragas_experimental/dataset.py
index 527ded343..e36cd21cc 100644
--- a/experimental/ragas_experimental/dataset.py
+++ b/experimental/ragas_experimental/dataset.py
@@ -1,494 +1,31 @@
 """A python list like object that contains your evaluation data."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/dataset.ipynb.
+__all__ = [
+    "BaseModelType",
+    "Dataset",
+]
 
-# %% auto 0
-__all__ = ['BaseModelType', 'DatasetBackend', 'RagasAppBackend', 'LocalBackend', 'create_dataset_backend', 'Dataset']
-
-# %% ../nbs/api/dataset.ipynb 2
-from abc import ABC, abstractmethod
-import os
 import typing as t
-import csv
-import uuid
 
-from fastcore.utils import patch
-import pandas as pd
+try:
+    import pandas as pd
+except ImportError:
+    pd = None
 
 from ragas_experimental.model.pydantic_model import (
     ExtendedPydanticBaseModel as BaseModel,
 )
-from .utils import create_nano_id, async_to_sync, get_test_directory
+
 from .backends.ragas_api_client import RagasApiClient
+from .project.backends import (
+    LocalCSVProjectBackend,
+    PlatformProjectBackend,
+)
 from .typing import SUPPORTED_BACKENDS
-import ragas_experimental.typing as rt
-from .metric import MetricResult
 
-# %% ../nbs/api/dataset.ipynb 3
 BaseModelType = t.TypeVar("BaseModelType", bound=BaseModel)
 
 
-class DatasetBackend(ABC):
-    """Abstract base class for dataset backends.
-
-    All dataset storage backends must implement these methods.
-    """
-
-    @abstractmethod
-    def initialize(self, dataset):
-        """Initialize the backend with dataset information"""
-        pass
-
-    @abstractmethod
-    def get_column_mapping(self, model):
-        """Get mapping between model fields and backend columns"""
-        pass
-
-    @abstractmethod
-    def load_entries(self, model_class):
-        """Load all entries from storage"""
-        pass
-
-    @abstractmethod
-    def append_entry(self, entry):
-        """Add a new entry to storage and return its ID"""
-        pass
-
-    @abstractmethod
-    def update_entry(self, entry):
-        """Update an existing entry in storage"""
-        pass
-
-    @abstractmethod
-    def delete_entry(self, entry_id):
-        """Delete an entry from storage"""
-        pass
-
-    @abstractmethod
-    def get_entry_by_field(self, field_name: str, field_value: t.Any, model_class):
-        """Get an entry by field value"""
-        pass
-
-# %% ../nbs/api/dataset.ipynb 4
-class RagasAppBackend(DatasetBackend):
-    """Backend for storing datasets using the Ragas API."""
-
-    def __init__(self, ragas_api_client, project_id, dataset_id):
-        """Initialize the RagasAppBackend.
-
-        Args:
-            ragas_api_client: The RagasApiClient instance
-            project_id: The ID of the project
-            dataset_id: The ID of the dataset
-        """
-        self.ragas_api_client = ragas_api_client
-        self.project_id = project_id
-        self.dataset_id = dataset_id
-        self.dataset = None
-
-    def __str__(self):
-        return f"RagasAppBackend(project_id={self.project_id}, dataset_id={self.dataset_id})"
-
-    def __repr__(self):
-        return self.__str__()
-
-    def initialize(self, dataset):
-        """Initialize the backend with the dataset instance."""
-        self.dataset = dataset
-
-    def get_column_mapping(self, model):
-        """Get mapping between model fields and backend columns."""
-        sync_func = async_to_sync(self.ragas_api_client.list_dataset_columns)
-        columns = sync_func(project_id=self.project_id, dataset_id=self.dataset_id)
-        column_id_map = {column["name"]: column["id"] for column in columns["items"]}
-
-        # Update the model's column mapping with the values from the API
-        column_mapping = {}
-        for field_name in model.__annotations__:
-            if field_name in column_id_map:
-                column_mapping[field_name] = column_id_map[field_name]
-
-        return column_mapping
-
-    def load_entries(self, model_class):
-        """Load all entries from the API."""
-        # Get all rows
-        sync_func = async_to_sync(self.ragas_api_client.list_dataset_rows)
-        response = sync_func(project_id=self.project_id, dataset_id=self.dataset_id)
-
-        # Get column mapping (ID -> name)
-        column_map = {v: k for k, v in model_class.__column_mapping__.items()}
-
-        # Process rows
-        entries = []
-        for row in response.get("items", []):
-            model_data = {}
-            row_id = row.get("id")
-
-            # Convert from API data format to model fields
-            for col_id, value in row.get("data", {}).items():
-                if col_id in column_map:
-                    field_name = column_map[col_id]
-                    model_data[field_name] = value
-
-            # Create model instance
-            entry = model_class(**model_data)
-
-            # Store row ID for future operations
-            entry._row_id = row_id
-
-            entries.append(entry)
-
-        return entries
-
-    def append_entry(self, entry):
-        """Add a new entry to the API and return its ID."""
-        import ragas_experimental.typing as rt
-
-        # Get column mapping
-        column_id_map = entry.__class__.__column_mapping__
-
-        # Create row data
-        row_dict_converted = rt.ModelConverter.instance_to_row(entry)
-        row_id = create_nano_id()
-        row_data = {}
-
-        for column in row_dict_converted["data"]:
-            if column["column_id"] in column_id_map:
-                row_data[column_id_map[column["column_id"]]] = column["data"]
-
-        # Create row in API
-        sync_func = async_to_sync(self.ragas_api_client.create_dataset_row)
-        response = sync_func(
-            project_id=self.project_id,
-            dataset_id=self.dataset_id,
-            id=row_id,
-            data=row_data,
-        )
-
-        # Return the row ID
-        return response["id"]
-
-    def update_entry(self, entry):
-        """Update an existing entry in the API."""
-        import ragas_experimental.typing as rt
-
-        # Get the row ID
-        row_id = None
-        if hasattr(entry, "_row_id") and entry._row_id:
-            row_id = entry._row_id
-        else:
-            raise ValueError("Cannot update: entry has no row ID")
-
-        # Get column mapping and prepare data
-        column_id_map = entry.__class__.__column_mapping__
-        row_dict = rt.ModelConverter.instance_to_row(entry)["data"]
-        row_data = {}
-
-        for column in row_dict:
-            if column["column_id"] in column_id_map:
-                row_data[column_id_map[column["column_id"]]] = column["data"]
-
-        # Update in API
-        sync_func = async_to_sync(self.ragas_api_client.update_dataset_row)
-        response = sync_func(
-            project_id=self.project_id,
-            dataset_id=self.dataset_id,
-            row_id=row_id,
-            data=row_data,
-        )
-
-        return response
-
-    def delete_entry(self, entry_id):
-        """Delete an entry from the API."""
-        # Delete the row
-        sync_func = async_to_sync(self.ragas_api_client.delete_dataset_row)
-        response = sync_func(
-            project_id=self.project_id, dataset_id=self.dataset_id, row_id=entry_id
-        )
-
-        return response
-
-    def get_entry_by_field(self, field_name, field_value, model_class):
-        """Get an entry by field value."""
-        # We don't have direct filtering in the API, so load all and filter
-        entries = self.load_entries(model_class)
-
-        # Search for matching entry
-        for entry in entries:
-            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
-                return entry
-
-        return None
-
-# %% ../nbs/api/dataset.ipynb 5
-class LocalBackend(DatasetBackend):
-    def __init__(
-        self,
-        local_root_dir,
-        project_id,
-        dataset_id,
-        dataset_name,
-        type: t.Literal["datasets", "experiments"],
-    ):
-        """Initialize the LocalBackend.
-
-        Args:
-            local_root_dir: The root directory for all projects
-            project_id: The ID of the project
-            dataset_id: The ID of the dataset
-            dataset_name: The name of the dataset
-        """
-        self.local_root_dir = local_root_dir
-        self.project_id = project_id
-        self.dataset_id = dataset_id
-        self.dataset_name = dataset_name
-        self.dataset = None
-        self.type = type
-
-    def __str__(self):
-        return f"LocalBackend(local_root_dir={self.local_root_dir}, project_id={self.project_id}, dataset_id={self.dataset_id}, dataset_name={self.dataset_name})"
-
-    def __repr__(self):
-        return self.__str__()
-
-    def initialize(self, dataset):
-        """Initialize the backend with the dataset instance."""
-        self.dataset = dataset
-
-        # Ensure CSV file exists
-        self._ensure_csv_exists()
-
-    def _ensure_csv_exists(self):
-        """Create the CSV file if it doesn't exist."""
-        csv_path = self._get_csv_path()
-
-        # Create directories if needed
-        os.makedirs(os.path.dirname(csv_path), exist_ok=True)
-
-        # Create file with headers if it doesn't exist
-        if not os.path.exists(csv_path):
-            # Include _row_id in the headers
-            field_names = ["_row_id"] + list(self.dataset.model.__annotations__.keys())
-
-            with open(csv_path, "w", newline="") as f:
-                writer = csv.writer(f)
-                writer.writerow(field_names)
-
-    def _get_csv_path(self):
-        """Get the path to the CSV file."""
-        return os.path.join(
-            self.local_root_dir, self.project_id, self.type, f"{self.dataset_name}.csv"
-        )
-
-    def get_column_mapping(self, model) -> t.Dict:
-        """Get mapping between model fields and CSV columns.
-
-        For CSV, column names directly match field names.
-        """
-        # Simple dictionary comprehension
-        return model.model_fields
-
-    def load_entries(self, model_class):
-        """Load all entries from the CSV file."""
-        csv_path = self._get_csv_path()
-
-        if not os.path.exists(csv_path):
-            return []
-
-        entries = []
-
-        with open(csv_path, "r", newline="") as f:
-            reader = csv.DictReader(f)
-
-            for row in reader:
-                try:
-                    # Extract row_id and remove from model data
-                    row_id = row.get("_row_id", str(uuid.uuid4()))
-
-                    # Create a copy without _row_id for model instantiation
-                    model_data = {k: v for k, v in row.items() if k != "_row_id"}
-
-                    # Convert types as needed
-                    typed_row = {}
-                    for field, value in model_data.items():
-                        if field in model_class.model_fields:
-                            field_type = model_class.model_fields[field].annotation
-
-                            # Handle basic type conversions
-                            if field_type == int:
-                                typed_row[field] = int(value) if value else 0
-                            elif field_type == float:
-                                typed_row[field] = float(value) if value else 0.0
-                            elif field_type == bool:
-                                typed_row[field] = value.lower() in (
-                                    "true",
-                                    "t",
-                                    "yes",
-                                    "y",
-                                    "1",
-                                )
-                            else:
-                                typed_row[field] = value
-
-                    # Create model instance
-                    entry = model_class(**typed_row)
-
-                    # Set the row ID from CSV (or use UUID if not available)
-                    entry._row_id = row_id
-
-                    entries.append(entry)
-                except Exception as e:
-                    print(f"Error loading row from CSV: {e}")
-
-        return entries
-
-    def append_entry(self, entry):
-        """Add a new entry to the CSV file and return a generated ID."""
-        csv_path = self._get_csv_path()
-
-        # Read existing rows to avoid overwriting
-        existing_rows = []
-        if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
-            with open(csv_path, "r", newline="") as f:
-                reader = csv.DictReader(f)
-                existing_rows = list(reader)
-
-        # Generate a row ID if needed
-        row_id = getattr(entry, "_row_id", None) or str(uuid.uuid4())
-
-        # Get field names including row_id
-        field_names = ["_row_id"] + list(entry.model_fields.keys())
-
-        # Convert entry to dict
-        entry_dict = entry.model_dump()
-
-        # Add row_id to the dict
-        entry_dict["_row_id"] = row_id
-
-        # Write all rows back with the new entry
-        with open(csv_path, "w", newline="") as f:
-            writer = csv.DictWriter(f, fieldnames=field_names)
-            writer.writeheader()
-
-            # Write existing rows
-            for row in existing_rows:
-                writer.writerow(row)
-
-            # Write new row
-            writer.writerow(entry_dict)
-
-        # Return the row ID
-        return row_id
-
-    def update_entry(self, entry):
-        """Update an existing entry in the CSV file.
-
-        Since CSV files don't support in-place updates, we need to
-        rewrite the entire file.
-        """
-        # Create a copy of entries to modify
-        entries_to_save = list(self.dataset._entries)  # Make a copy
-
-        # Find the entry to update
-        updated = False
-        for i, e in enumerate(entries_to_save):
-            if (
-                hasattr(e, "_row_id")
-                and hasattr(entry, "_row_id")
-                and e._row_id == entry._row_id
-            ):
-                # Update the entry in our copy
-                entries_to_save[i] = entry
-                updated = True
-                break
-
-        # If entry wasn't found, just append it
-        if not updated and entries_to_save:
-            entries_to_save.append(entry)
-
-        # Write all entries back to CSV
-        self._write_entries_to_csv(entries_to_save)
-
-        return True
-
-    def delete_entry(self, entry_id):
-        """Delete an entry from the CSV file.
-
-        This method should NOT modify self.dataset._entries directly.
-        Dataset.pop() handles that separately.
-        """
-        # Create a copy of entries to modify, excluding the one to delete
-        entries_to_save = []
-        for e in self.dataset._entries:
-            if not (hasattr(e, "_row_id") and e._row_id == entry_id):
-                entries_to_save.append(e)
-
-        # Write all entries back to CSV
-        self._write_entries_to_csv(entries_to_save)
-
-        return True
-
-    def _write_entries_to_csv(self, entries):
-        """Write all entries to the CSV file."""
-        csv_path = self._get_csv_path()
-
-        if not entries:
-            # If no entries, just create an empty CSV with headers
-            field_names = ["_row_id"] + list(self.dataset.model.model_fields.keys())
-            with open(csv_path, "w", newline="") as f:
-                writer = csv.DictWriter(f, fieldnames=field_names)
-                writer.writeheader()
-            return
-
-        # Get field names including _row_id
-        field_names = ["_row_id"] + list(entries[0].__class__.model_fields.keys())
-
-        # Write all entries
-        with open(csv_path, "w", newline="") as f:
-            writer = csv.DictWriter(f, fieldnames=field_names)
-            writer.writeheader()
-
-            for entry in entries:
-                # Create a dict with model data + row_id
-                entry_dict = entry.model_dump()
-                entry_dict["_row_id"] = getattr(entry, "_row_id", str(uuid.uuid4()))
-
-                writer.writerow(entry_dict)
-
-    def get_entry_by_field(self, field_name, field_value, model_class):
-        """Get an entry by field value."""
-        entries = self.load_entries(model_class)
-
-        for entry in entries:
-            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
-                return entry
-
-        return None
-
-# %% ../nbs/api/dataset.ipynb 6
-def create_dataset_backend(backend_type: SUPPORTED_BACKENDS, **kwargs):
-    """Factory function to create the appropriate backend.
-
-    Args:
-        backend_type: The type of backend to create (ragas_app or local)
-        **kwargs: Arguments specific to the backend
-
-    Returns:
-        DatasetBackend: An instance of the requested backend
-    """
-    backend_classes = {
-        "ragas_app": RagasAppBackend,
-        "local": LocalBackend,
-    }
-
-    if backend_type not in backend_classes:
-        raise ValueError(f"Unsupported backend: {backend_type}")
-
-    return backend_classes[backend_type](**kwargs)
-
-# %% ../nbs/api/dataset.ipynb 8
 class Dataset(t.Generic[BaseModelType]):
     """A list-like interface for managing dataset entries with backend synchronization.
 
@@ -504,7 +41,7 @@ def __init__(
         dataset_id: str,
         datatable_type: t.Literal["datasets", "experiments"],
         ragas_api_client: t.Optional[RagasApiClient] = None,
-        backend: SUPPORTED_BACKENDS = "local",
+        backend: SUPPORTED_BACKENDS = "local/csv",
         local_root_dir: t.Optional[str] = None,
     ):
         """Initialize a Dataset with the specified backend.
@@ -514,8 +51,9 @@ def __init__(
             model: The Pydantic model class for entries
             project_id: The ID of the parent project
             dataset_id: The ID of this dataset
-            ragas_api_client: Required for ragas_app backend
-            backend: The storage backend to use (ragas_app or local)
+            datatable_type: Whether this is for "datasets" or "experiments"
+            ragas_api_client: Required for ragas/app backend
+            backend: The storage backend to use (ragas/app or local/csv)
             local_root_dir: Required for local backend
         """
         # Store basic properties
@@ -527,28 +65,42 @@ def __init__(
         self.datatable_type = datatable_type
         self._entries: t.List[BaseModelType] = []
 
-        # Create the appropriate backend
-        backend_params = {}
-        if backend == "ragas_app":
+        # Create the appropriate backend using the project backend system
+        if backend == "ragas/app":
             if ragas_api_client is None:
-                raise ValueError("ragas_api_client is required for ragas_app backend")
-            backend_params = {
-                "ragas_api_client": ragas_api_client,
-                "project_id": project_id,
-                "dataset_id": dataset_id,
-            }
-        elif backend == "local":
+                raise ValueError("ragas_api_client is required for ragas/app backend")
+
+            # Create a platform project backend and get dataset backend from it
+            project_backend = PlatformProjectBackend(ragas_api_client)
+            project_backend.initialize(project_id)
+
+            if datatable_type == "datasets":
+                self._backend = project_backend.get_dataset_backend(
+                    dataset_id, name, model
+                )
+            else:  # experiments
+                self._backend = project_backend.get_experiment_backend(
+                    dataset_id, name, model
+                )
+
+        elif backend == "local/csv":
             if local_root_dir is None:
-                raise ValueError("local_root_dir is required for local backend")
-            backend_params = {
-                "local_root_dir": local_root_dir,
-                "project_id": project_id,
-                "dataset_id": dataset_id,
-                "dataset_name": name,
-                "type": self.datatable_type,
-            }
-
-        self._backend = create_dataset_backend(backend, **backend_params)
+                raise ValueError("local_root_dir is required for local/csv backend")
+
+            # Create a local CSV project backend and get dataset backend from it
+            project_backend = LocalCSVProjectBackend(local_root_dir)
+            project_backend.initialize(project_id)
+
+            if datatable_type == "datasets":
+                self._backend = project_backend.get_dataset_backend(
+                    dataset_id, name, model
+                )
+            else:  # experiments
+                self._backend = project_backend.get_experiment_backend(
+                    dataset_id, name, model
+                )
+        else:
+            raise ValueError(f"Unsupported backend: {backend}")
 
         # Initialize the backend with this dataset
         self._backend.initialize(self)
@@ -571,7 +123,7 @@ def __getitem__(
         if isinstance(key, slice):
             # Create a shallow copy of the dataset
             new_dataset = object.__new__(type(self))
-            
+
             # Copy all attributes
             new_dataset.name = self.name
             new_dataset.model = self.model
@@ -579,13 +131,13 @@ def __getitem__(
             new_dataset.dataset_id = self.dataset_id
             new_dataset.backend_type = self.backend_type
             new_dataset.datatable_type = self.datatable_type
-            
+
             # Share the same backend reference
             new_dataset._backend = self._backend
-            
+
             # Set the entries to the sliced entries
             new_dataset._entries = self._entries[key]
-            
+
             return new_dataset
         else:
             return self._entries[key]
@@ -620,185 +172,165 @@ def __iter__(self) -> t.Iterator[BaseModelType]:
         """Iterate over the entries in the dataset."""
         return iter(self._entries)
 
-# %% ../nbs/api/dataset.ipynb 16
-@patch
-def append(self: Dataset, entry: BaseModelType) -> None:
-    """Add a new entry to the dataset and sync to backend.
+    def append(self, entry: BaseModelType) -> None:
+        """Add a new entry to the dataset and sync to backend.
 
-    Args:
-        entry: The entry to add to the dataset
-    """
-    if not isinstance(entry, self.model):
-        raise TypeError(f"Entry must be an instance of {self.model.__name__}")
+        Args:
+            entry: The entry to add to the dataset
+        """
+        if not isinstance(entry, self.model):
+            raise TypeError(f"Entry must be an instance of {self.model.__name__}")
+
+        # Add to backend and get ID
+        row_id = self._backend.append_entry(entry)
 
-    # Add to backend and get ID
-    row_id = self._backend.append_entry(entry)
+        # Store the ID
+        entry._row_id = row_id
 
-    # Store the ID
-    entry._row_id = row_id
+        # Add to local cache
+        self._entries.append(entry)
 
-    # Add to local cache
-    self._entries.append(entry)
+    def pop(self, index: int = -1) -> BaseModelType:
+        """Remove and return entry at index, sync deletion to backend.
 
-# %% ../nbs/api/dataset.ipynb 20
-@patch
-def pop(self: Dataset, index: int = -1) -> BaseModelType:
-    """Remove and return entry at index, sync deletion to backend.
+        Args:
+            index: The index of the entry to remove (default: -1, the last entry)
 
-    Args:
-        index: The index of the entry to remove (default: -1, the last entry)
+        Returns:
+            The removed entry
+        """
+        # Get the entry
+        entry = self._entries[index]
 
-    Returns:
-        The removed entry
-    """
-    # Get the entry
-    entry = self._entries[index]
-
-    # Get the row ID
-    row_id = getattr(entry, "_row_id", None)
-    if row_id is None:
-        raise ValueError(
-            "Entry has no row ID. This likely means it was not added or synced to the dataset."
-        )
+        # Get the row ID
+        row_id = getattr(entry, "_row_id", None)
+        if row_id is None:
+            raise ValueError(
+                "Entry has no row ID. This likely means it was not added or synced to the dataset."
+            )
 
-    # Delete from backend
-    self._backend.delete_entry(row_id)
+        # Delete from backend
+        self._backend.delete_entry(row_id)
 
-    # Remove from local cache
-    return self._entries.pop(index)
+        # Remove from local cache
+        return self._entries.pop(index)
 
-# %% ../nbs/api/dataset.ipynb 24
-@patch
-def load(self: Dataset) -> None:
-    """Load all entries from the backend."""
-    # Get entries from backend
-    self._entries = self._backend.load_entries(self.model)
+    def load(self) -> None:
+        """Load all entries from the backend."""
+        # Get entries from backend
+        self._entries = self._backend.load_entries(self.model)
 
-# %% ../nbs/api/dataset.ipynb 26
-@patch
-def load_as_dicts(self: Dataset) -> t.List[t.Dict]:
-    """Load all entries as dictionaries.
+    def load_as_dicts(self) -> t.List[t.Dict]:
+        """Load all entries as dictionaries.
 
-    Returns:
-        List of dictionaries representing the entries
-    """
-    # Make sure we have entries
-    if not self._entries:
-        self.load()
+        Returns:
+            List of dictionaries representing the entries
+        """
+        # Make sure we have entries
+        if not self._entries:
+            self.load()
 
-    # Convert to dictionaries
-    return [entry.model_dump() for entry in self._entries]
+        # Convert to dictionaries
+        return [entry.model_dump() for entry in self._entries]
 
-# %% ../nbs/api/dataset.ipynb 29
-@patch
-def to_pandas(self: Dataset) -> "pd.DataFrame":
-    """Convert dataset to pandas DataFrame."""
+    def to_pandas(self) -> "pd.DataFrame":
+        """Convert dataset to pandas DataFrame.
 
-    # Make sure we have data
-    if not self._entries:
-        self.load()
+        Returns:
+            pd.DataFrame: A DataFrame containing all entries
 
-    # Convert entries to dictionaries
-    data = [entry.model_dump() for entry in self._entries]
-    return pd.DataFrame(data)
+        Raises:
+            ImportError: If pandas is not installed
+        """
+        if pd is None:
+            raise ImportError(
+                "pandas is required for to_pandas(). Install with: pip install pandas "
+                "or pip install ragas_experimental[all]"
+            )
 
-# %% ../nbs/api/dataset.ipynb 31
-@patch
-def save(self: Dataset, item: BaseModelType) -> None:
-    """Save changes to an item to the backend.
+        # Make sure we have data
+        if not self._entries:
+            self.load()
 
-    Args:
-        item: The item to save
-    """
-    if not isinstance(item, self.model):
-        raise TypeError(f"Item must be an instance of {self.model.__name__}")
+        # Convert entries to dictionaries
+        data = [entry.model_dump() for entry in self._entries]
+        return pd.DataFrame(data)
 
-    # Check if the item has a row ID
-    if not hasattr(item, "_row_id") or not item._row_id:
-        # Try to find it in our entries by matching
-        for i, entry in enumerate(self._entries):
-            if id(entry) == id(item):  # Check if it's the same object
-                if hasattr(entry, "_row_id") and entry._row_id:
-                    item._row_id = entry._row_id
-                    break
-
-    if not hasattr(item, "_row_id") or not item._row_id:
-        raise ValueError(
-            "Cannot save: item is not from this dataset or was not properly synced"
-        )
+    def save(self, item: BaseModelType) -> None:
+        """Save changes to an item to the backend.
 
-    # Update in backend
-    self._backend.update_entry(item)
+        Args:
+            item: The item to save
+        """
+        if not isinstance(item, self.model):
+            raise TypeError(f"Item must be an instance of {self.model.__name__}")
+
+        # Check if the item has a row ID
+        if not hasattr(item, "_row_id") or not item._row_id:
+            # Try to find it in our entries by matching
+            for i, entry in enumerate(self._entries):
+                if id(entry) == id(item):  # Check if it's the same object
+                    if hasattr(entry, "_row_id") and entry._row_id:
+                        item._row_id = entry._row_id
+                        break
+
+        if not hasattr(item, "_row_id") or not item._row_id:
+            raise ValueError(
+                "Cannot save: item is not from this dataset or was not properly synced"
+            )
 
-    # Update in local cache if needed
-    self._update_local_entry(item)
+        # Update in backend
+        self._backend.update_entry(item)
 
+        # Update in local cache if needed
+        self._update_local_entry(item)
 
-@patch
-def _update_local_entry(self: Dataset, item: BaseModelType) -> None:
-    """Update an entry in the local cache.
+    def _update_local_entry(self, item: BaseModelType) -> None:
+        """Update an entry in the local cache.
 
-    Args:
-        item: The item to update
-    """
-    for i, entry in enumerate(self._entries):
-        if (
-            hasattr(entry, "_row_id")
-            and hasattr(item, "_row_id")
-            and entry._row_id == item._row_id
-        ):
-            # If it's not the same object, update our copy
-            if id(entry) != id(item):
-                self._entries[i] = item
-            break
-
-# %% ../nbs/api/dataset.ipynb 35
-@patch
-def get(
-    self: Dataset, field_value: t.Any, field_name: str = "_row_id"
-) -> t.Optional[BaseModelType]:
-    """Get an entry by field value.
-
-    Args:
-        field_value: The value to match
-        field_name: The field to match against (default: "_row_id")
-
-    Returns:
-        The matching model instance or None if not found
-    """
-    # Check if we need to load entries
-    if not self._entries:
-        self.load()
-
-    # Search in local entries first
-    for entry in self._entries:
-        if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
-            return entry
-
-    # If not found, try to get from backend
-    if field_name == "_row_id":
-        # Special case for row IDs
-        for entry in self._entries:
-            if hasattr(entry, "_row_id") and entry._row_id == field_value:
-                return entry
-    else:
-        # Use backend to search
-        return self._backend.get_entry_by_field(field_name, field_value, self.model)
+        Args:
+            item: The item to update
+        """
+        for i, entry in enumerate(self._entries):
+            if (
+                hasattr(entry, "_row_id")
+                and hasattr(item, "_row_id")
+                and entry._row_id == item._row_id
+            ):
+                # If it's not the same object, update our copy
+                if id(entry) != id(item):
+                    self._entries[i] = item
+                break
 
-    return None
+    def get(
+        self, field_value: t.Any, field_name: str = "_row_id"
+    ) -> t.Optional[BaseModelType]:
+        """Get an entry by field value.
 
-# %% ../nbs/api/dataset.ipynb 38
-@patch
-def to_pandas(self: Dataset) -> "pd.DataFrame":
-    """Convert dataset to pandas DataFrame.
+        Args:
+            field_value: The value to match
+            field_name: The field to match against (default: "_row_id")
 
-    Returns:
-        pd.DataFrame: A DataFrame containing all entries
-    """
-    # Make sure we have data
-    if not self._entries:
-        self.load()
+        Returns:
+            The matching model instance or None if not found
+        """
+        # Check if we need to load entries
+        if not self._entries:
+            self.load()
+
+        # Search in local entries first
+        for entry in self._entries:
+            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
+                return entry
+
+        # If not found, try to get from backend
+        if field_name == "_row_id":
+            # Special case for row IDs
+            for entry in self._entries:
+                if hasattr(entry, "_row_id") and entry._row_id == field_value:
+                    return entry
+        else:
+            # Use backend to search
+            return self._backend.get_entry_by_field(field_name, field_value, self.model)
 
-    # Convert entries to dictionaries
-    data = [entry.model_dump() for entry in self._entries]
-    return pd.DataFrame(data)
+        return None
diff --git a/experimental/ragas_experimental/embedding/__init__.py b/experimental/ragas_experimental/embedding/__init__.py
index 4df571636..fec2e0cb7 100644
--- a/experimental/ragas_experimental/embedding/__init__.py
+++ b/experimental/ragas_experimental/embedding/__init__.py
@@ -1,4 +1,3 @@
-from ragas_experimental.embedding.base import BaseEmbedding
-from ragas_experimental.embedding.base import ragas_embedding
+from ragas_experimental.embedding.base import BaseEmbedding, ragas_embedding
 
-__all__ = ['ragas_embedding','BaseEmbedding']
\ No newline at end of file
+__all__ = ["ragas_embedding", "BaseEmbedding"]
diff --git a/experimental/ragas_experimental/embedding/base.py b/experimental/ragas_experimental/embedding/base.py
index aec02fc45..3910a733d 100644
--- a/experimental/ragas_experimental/embedding/base.py
+++ b/experimental/ragas_experimental/embedding/base.py
@@ -1,16 +1,11 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/embedding/base.ipynb.
+__all__ = ["BaseEmbedding", "OpenAIEmbeddings", "ragas_embedding"]
 
-# %% auto 0
-__all__ = ['BaseEmbedding', 'OpenAIEmbeddings', 'ragas_embedding']
-
-# %% ../../nbs/api/embedding/base.ipynb 2
 import typing as t
 from abc import ABC, abstractmethod
 
+
 # TODO: Add support for other providers like HuggingFace, Cohere, etc.
 # TODO: handle async calls properly and ensure that the client supports async if needed.
-
-
 class BaseEmbedding(ABC):
     @abstractmethod
     def embed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:
diff --git a/experimental/ragas_experimental/exceptions.py b/experimental/ragas_experimental/exceptions.py
index 69e0bcab2..f7ec46a6d 100644
--- a/experimental/ragas_experimental/exceptions.py
+++ b/experimental/ragas_experimental/exceptions.py
@@ -1,37 +1,45 @@
-"""All the exceptions specific to the `notion_annotator` project."""
+"""All the exceptions specific to the `ragas_experimental` project."""
+
+__all__ = [
+    "RagasError",
+    "ValidationError",
+    "DuplicateError",
+    "NotFoundError",
+    "ResourceNotFoundError",
+    "ProjectNotFoundError",
+    "DatasetNotFoundError",
+    "ExperimentNotFoundError",
+    "DuplicateResourceError",
+    "DuplicateProjectError",
+    "DuplicateDatasetError",
+    "DuplicateExperimentError",
+]
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/exceptions.ipynb.
 
-# %% auto 0
-__all__ = ['RagasError', 'ValidationError', 'DuplicateError', 'NotFoundError', 'ResourceNotFoundError', 'ProjectNotFoundError',
-           'DatasetNotFoundError', 'ExperimentNotFoundError', 'DuplicateResourceError', 'DuplicateProjectError',
-           'DuplicateDatasetError', 'DuplicateExperimentError']
-
-# %% ../nbs/api/exceptions.ipynb 2
 class RagasError(Exception):
     """Base class for all Ragas-related exceptions."""
 
     pass
 
-# %% ../nbs/api/exceptions.ipynb 3
-class ValidationError(Exception):
+
+class ValidationError(RagasError):
     """Raised when field validation fails."""
 
     pass
 
 
-class DuplicateError(Exception):
+class DuplicateError(RagasError):
     """Raised when multiple items are found but only one was expected."""
 
     pass
 
 
-class NotFoundError(Exception):
+class NotFoundError(RagasError):
     """Raised when an item is not found."""
 
     pass
 
-# %% ../nbs/api/exceptions.ipynb 4
+
 class ResourceNotFoundError(RagasError):
     """Exception raised when a requested resource doesn't exist."""
 
@@ -55,7 +63,7 @@ class ExperimentNotFoundError(ResourceNotFoundError):
 
     pass
 
-# %% ../nbs/api/exceptions.ipynb 5
+
 class DuplicateResourceError(RagasError):
     """Exception raised when multiple resources exist with the same identifier."""
 
diff --git a/experimental/ragas_experimental/experiment.py b/experimental/ragas_experimental/experiment.py
index c3e5d7688..7fc54eb21 100644
--- a/experimental/ragas_experimental/experiment.py
+++ b/experimental/ragas_experimental/experiment.py
@@ -1,22 +1,17 @@
 """Experiments hold the results of an experiment against a dataset."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/experiment.ipynb.
+__all__ = ["Experiment"]
 
-# %% auto 0
-__all__ = ['Experiment']
-
-# %% ../nbs/api/experiment.ipynb 2
 import typing as t
 
-from fastcore.utils import patch
-
 from ragas_experimental.model.pydantic_model import (
     ExtendedPydanticBaseModel as BaseModel,
 )
+
 from .backends.ragas_api_client import RagasApiClient
 from .dataset import Dataset
 
-# %% ../nbs/api/experiment.ipynb 3
+
 class Experiment(Dataset):
     def __init__(
         self,
@@ -25,7 +20,7 @@ def __init__(
         project_id: str,
         experiment_id: str,
         ragas_api_client: t.Optional[RagasApiClient] = None,
-        backend: t.Literal["ragas_app", "local"] = "ragas_app",
+        backend: t.Literal["ragas/app", "local/csv"] = "ragas/app",
         local_root_dir: t.Optional[str] = None,
     ):
         self.experiment_id = experiment_id
diff --git a/experimental/ragas_experimental/llm/__init__.py b/experimental/ragas_experimental/llm/__init__.py
index f3540b254..40c0b6717 100644
--- a/experimental/ragas_experimental/llm/__init__.py
+++ b/experimental/ragas_experimental/llm/__init__.py
@@ -1,3 +1,3 @@
 from ragas_experimental.llm.llm import RagasLLM, ragas_llm
 
-__all__ = ["RagasLLM", "ragas_llm"]
\ No newline at end of file
+__all__ = ["RagasLLM", "ragas_llm"]
diff --git a/experimental/ragas_experimental/llm/llm.py b/experimental/ragas_experimental/llm/llm.py
index 2f19a403a..711ff293c 100644
--- a/experimental/ragas_experimental/llm/llm.py
+++ b/experimental/ragas_experimental/llm/llm.py
@@ -1,15 +1,12 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/llm/llm.ipynb.
+__all__ = ["T", "RagasLLM", "ragas_llm"]
 
-# %% auto 0
-__all__ = ['T', 'RagasLLM', 'ragas_llm']
-
-# %% ../../nbs/api/llm/llm.ipynb 2
-import typing as t
 import asyncio
 import inspect
 import threading
-from pydantic import BaseModel
+import typing as t
+
 import instructor
+from pydantic import BaseModel
 
 T = t.TypeVar("T", bound=BaseModel)
 
diff --git a/experimental/ragas_experimental/metric/__init__.py b/experimental/ragas_experimental/metric/__init__.py
index 0675201ba..e7b35e306 100644
--- a/experimental/ragas_experimental/metric/__init__.py
+++ b/experimental/ragas_experimental/metric/__init__.py
@@ -1,12 +1,13 @@
-from ragas_experimental.metric.result import MetricResult
 from ragas_experimental.metric.base import Metric
 from ragas_experimental.metric.discrete import DiscreteMetric
 from ragas_experimental.metric.numeric import NumericMetric
 from ragas_experimental.metric.ranking import RankingMetric
+from ragas_experimental.metric.result import MetricResult
 
-__all__ = ['MetricResult',
-           'Metric',
-           'DiscreteMetric',
-           'NumericMetric',
-           'RankingMetric',
-           ]
+__all__ = [
+    "MetricResult",
+    "Metric",
+    "DiscreteMetric",
+    "NumericMetric",
+    "RankingMetric",
+]
diff --git a/experimental/ragas_experimental/metric/base.py b/experimental/ragas_experimental/metric/base.py
index 5d70ff388..c262fee98 100644
--- a/experimental/ragas_experimental/metric/base.py
+++ b/experimental/ragas_experimental/metric/base.py
@@ -1,31 +1,27 @@
 """base class for all type of metrics in ragas"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/base.ipynb.
+__all__ = ["Metric"]
 
-# %% auto 0
-__all__ = ['Metric']
-
-# %% ../../nbs/api/metric/base.ipynb 2
-from abc import ABC
 import asyncio
+import string
+import typing as t
+from abc import ABC
 from dataclasses import dataclass, field
+
 from pydantic import BaseModel
-import typing as t
 from tqdm import tqdm
-import string
 
-
-from ..prompt.base import Prompt
 from ..embedding.base import BaseEmbedding
-from . import MetricResult
 from ..llm import RagasLLM
 from ..model.notion_model import NotionModel
+from ..prompt.base import Prompt
 from ..prompt.dynamic_few_shot import DynamicFewShotPrompt
+from .result import MetricResult
 
 if t.TYPE_CHECKING:
     from ragas_experimental.project.core import Project
 
-# %% ../../nbs/api/metric/base.ipynb 3
+
 @dataclass
 class Metric(ABC):
     """Base class for all metrics in the LLM evaluation library."""
@@ -38,7 +34,6 @@ def __post_init__(self):
         if isinstance(self.prompt, str):
             self.prompt = Prompt(self.prompt)
 
-
     def get_variables(self) -> t.List[str]:
         if isinstance(self.prompt, Prompt):
             fstr = self.prompt.instruction
@@ -52,43 +47,42 @@ def get_variables(self) -> t.List[str]:
         return vars
 
     def score(self, llm: RagasLLM, **kwargs) -> MetricResult:
-        
+
         traces = {}
         traces["input"] = kwargs
         prompt_input = self.prompt.format(**kwargs)
-        response = llm.generate(
-            prompt_input, response_model=self._response_model
-        )
+        response = llm.generate(prompt_input, response_model=self._response_model)
         traces["output"] = response.model_dump()
         result = MetricResult(**response.model_dump())
         result.traces = traces
         return result
 
-    async def ascore(
-        self, llm: RagasLLM, **kwargs
-    ) -> MetricResult:
-       
+    async def ascore(self, llm: RagasLLM, **kwargs) -> MetricResult:
+
         traces = {}
-        
+
         prompt_input = self.prompt.format(**kwargs)
         traces["input"] = prompt_input
         response = await llm.agenerate(
-            prompt_input, response_model=self._response_model,
+            prompt_input,
+            response_model=self._response_model,
         )
         traces["output"] = response.model_dump()
-        result = MetricResult(
-            **response.model_dump()
-        )  # Fixed missing parentheses
+        result = MetricResult(**response.model_dump())  # Fixed missing parentheses
         result.traces = traces
         return result
 
     def batch_score(
-        self, llm: RagasLLM, inputs: t.List[t.Dict[str, t.Any]],
-    ) -> t.List[t.Any]:
+        self,
+        llm: RagasLLM,
+        inputs: t.List[t.Dict[str, t.Any]],
+    ) -> t.List[MetricResult]:
         return [self.score(llm, **input_dict) for input_dict in inputs]
 
     async def abatch_score(
-        self, llm: RagasLLM, inputs: t.List[t.Dict[str, t.Any]],
+        self,
+        llm: RagasLLM,
+        inputs: t.List[t.Dict[str, t.Any]],
     ) -> t.List[MetricResult]:
         async_tasks = []
         for input_dict in inputs:
diff --git a/experimental/ragas_experimental/metric/decorator.py b/experimental/ragas_experimental/metric/decorator.py
index 7cb5d0b86..b275083ca 100644
--- a/experimental/ragas_experimental/metric/decorator.py
+++ b/experimental/ragas_experimental/metric/decorator.py
@@ -1,17 +1,14 @@
 """decorator factory for creating custom metrics"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/decorator.ipynb.
+__all__ = ["create_metric_decorator"]
 
-# %% auto 0
-__all__ = ['create_metric_decorator']
-
-# %% ../../nbs/api/metric/decorator.ipynb 2
-import typing as t
-import inspect
 import asyncio
+import inspect
+import typing as t
 from dataclasses import dataclass
-from . import MetricResult
+
 from ..llm import RagasLLM
+from .result import MetricResult
 
 
 def create_metric_decorator(metric_class):
@@ -47,38 +44,38 @@ def decorator(func):
             # Get metric name and check if function is async
             metric_name = name or func.__name__
             is_async = inspect.iscoroutinefunction(func)
-            
+
             # Check function signature to determine if it expects llm/prompt
             sig = inspect.signature(func)
             param_names = list(sig.parameters.keys())
-            expects_llm = 'llm' in param_names
-            expects_prompt = 'prompt' in param_names
+            expects_llm = "llm" in param_names
+            expects_prompt = "prompt" in param_names
 
             # TODO: Move to dataclass type implementation
             @dataclass
             class CustomMetric(metric_class):
-                
+
                 def _validate_result_value(self, result_value):
                     """Validate result value based on metric type constraints."""
                     # Discrete metric validation
-                    if hasattr(self, 'values') and result_value not in self.values:
+                    if hasattr(self, "values") and result_value not in self.values:
                         return f"Metric {self.name} returned '{result_value}' but expected one of {self.values}"
-                    
+
                     # Numeric metric validation
-                    if hasattr(self, 'range'):
+                    if hasattr(self, "range"):
                         if not isinstance(result_value, (int, float)):
                             return f"Metric {self.name} returned '{result_value}' but expected a numeric value"
                         min_val, max_val = self.range
                         if not (min_val <= result_value <= max_val):
                             return f"Metric {self.name} returned {result_value} but expected value in range {self.range}"
-                    
+
                     # Ranking metric validation
-                    if hasattr(self, 'num_ranks'):
+                    if hasattr(self, "num_ranks"):
                         if not isinstance(result_value, list):
                             return f"Metric {self.name} returned '{result_value}' but expected a list"
                         if len(result_value) != self.num_ranks:
                             return f"Metric {self.name} returned list of length {len(result_value)} but expected {self.num_ranks} items"
-                    
+
                     return None  # No validation error
 
                 def _run_sync_in_async(self, func, *args, **kwargs):
@@ -92,12 +89,12 @@ def _execute_metric(self, llm, is_async_execution, **kwargs):
                         # Prepare function arguments based on what the function expects
                         func_kwargs = kwargs.copy()
                         func_args = []
-                        
+
                         if expects_llm:
                             func_args.append(llm)
                         if expects_prompt:
                             func_args.append(self.prompt)
-                        
+
                         if is_async:
                             # Async function implementation
                             if is_async_execution:
@@ -119,13 +116,15 @@ def _execute_metric(self, llm, is_async_execution, **kwargs):
 
                         # Ensure result is a MetricResult
                         if not isinstance(result, MetricResult):
-                            raise ValueError(f"Custom metric function must return MetricResult, got {type(result)}")
-                        
+                            raise ValueError(
+                                f"Custom metric function must return MetricResult, got {type(result)}"
+                            )
+
                         # Validate the result based on metric type
                         validation_error = self._validate_result_value(result.result)
                         if validation_error:
                             return MetricResult(result=None, reason=validation_error)
-                        
+
                         return result
 
                     except Exception as e:
@@ -135,21 +134,19 @@ def _execute_metric(self, llm, is_async_execution, **kwargs):
 
                 def score(self, llm: t.Optional[RagasLLM] = None, **kwargs):
                     """Synchronous scoring method."""
-                    return self._execute_metric(
-                        llm, is_async_execution=False, **kwargs
-                    )
+                    return self._execute_metric(llm, is_async_execution=False, **kwargs)
 
                 async def ascore(self, llm: t.Optional[RagasLLM] = None, **kwargs):
                     """Asynchronous scoring method."""
                     # Prepare function arguments based on what the function expects
                     func_kwargs = kwargs.copy()
                     func_args = []
-                    
+
                     if expects_llm:
                         func_args.append(llm)
                     if expects_prompt:
                         func_args.append(self.prompt)
-                    
+
                     if is_async:
                         # For async functions, await the result
                         result = await func(*func_args, **func_kwargs)
@@ -158,22 +155,22 @@ async def ascore(self, llm: t.Optional[RagasLLM] = None, **kwargs):
                         result = self._run_sync_in_async(
                             func, *func_args, **func_kwargs
                         )
-                    
+
                     # Ensure result is a MetricResult
                     if not isinstance(result, MetricResult):
-                        raise ValueError(f"Custom metric function must return MetricResult, got {type(result)}")
-                    
+                        raise ValueError(
+                            f"Custom metric function must return MetricResult, got {type(result)}"
+                        )
+
                     # Validate the result based on metric type
                     validation_error = self._validate_result_value(result.result)
                     if validation_error:
                         return MetricResult(result=None, reason=validation_error)
-                    
+
                     return result
 
             # Create the metric instance with all parameters
-            metric_instance = CustomMetric(
-                name=metric_name,**metric_params
-            )
+            metric_instance = CustomMetric(name=metric_name, **metric_params)
 
             # Preserve metadata
             metric_instance.__name__ = metric_name
diff --git a/experimental/ragas_experimental/metric/discrete.py b/experimental/ragas_experimental/metric/discrete.py
index 2abfb6053..432e392d7 100644
--- a/experimental/ragas_experimental/metric/discrete.py
+++ b/experimental/ragas_experimental/metric/discrete.py
@@ -1,14 +1,12 @@
 """Base class from which all discrete metrics should inherit."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/discrete.ipynb.
+__all__ = ["discrete_metric", "DiscreteMetric"]
 
-# %% auto 0
-__all__ = ['discrete_metric', 'DiscreteMetric']
-
-# %% ../../nbs/api/metric/discrete.ipynb 2
 import typing as t
 from dataclasses import dataclass, field
+
 from pydantic import create_model
+
 from . import Metric
 from .decorator import create_metric_decorator
 
@@ -20,10 +18,9 @@ class DiscreteMetric(Metric):
     def __post_init__(self):
         super().__post_init__()
         values = tuple(self.values)
-        self._response_model = create_model("response_model", 
-                           result=(t.Literal[values], ...),
-                           reason=(str, ...))
-
+        self._response_model = create_model(
+            "response_model", result=(t.Literal[values], ...), reason=(str, ...)
+        )
 
 
 discrete_metric = create_metric_decorator(DiscreteMetric)
diff --git a/experimental/ragas_experimental/metric/numeric.py b/experimental/ragas_experimental/metric/numeric.py
index bd8dd697d..bf6bc0089 100644
--- a/experimental/ragas_experimental/metric/numeric.py
+++ b/experimental/ragas_experimental/metric/numeric.py
@@ -1,14 +1,12 @@
 """Base class for all numeric metrics"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/numeric.ipynb.
+__all__ = ["numeric_metric", "NumericMetric"]
 
-# %% auto 0
-__all__ = ['numeric_metric', 'NumericMetric']
-
-# %% ../../nbs/api/metric/numeric.ipynb 2
 import typing as t
-from dataclasses import dataclass, field
+from dataclasses import dataclass
+
 from pydantic import create_model
+
 from . import Metric
 from .decorator import create_metric_decorator
 
@@ -22,5 +20,4 @@ def __post_init__(self):
         self._response_model = create_model("response_model", result=(float, ...))
 
 
-
 numeric_metric = create_metric_decorator(NumericMetric)
diff --git a/experimental/ragas_experimental/metric/ranking.py b/experimental/ragas_experimental/metric/ranking.py
index 853b179a1..e21fd2eb9 100644
--- a/experimental/ragas_experimental/metric/ranking.py
+++ b/experimental/ragas_experimental/metric/ranking.py
@@ -1,15 +1,12 @@
 """Base class for ranking metrics"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/ranking.ipynb.
+__all__ = ["ranking_metric", "RankingMetric"]
 
-# %% auto 0
-__all__ = ['ranking_metric', 'RankingMetric']
-
-# %% ../../nbs/api/metric/ranking.ipynb 2
 import typing as t
-from dataclasses import dataclass, field
-from pydantic import Field
-from pydantic import create_model
+from dataclasses import dataclass
+
+from pydantic import Field, create_model
+
 from . import Metric
 from .decorator import create_metric_decorator
 
@@ -17,7 +14,7 @@
 @dataclass
 class RankingMetric(Metric):
     num_ranks: int = 2
-    
+
     def __post_init__(self):
         super().__post_init__()
         self._response_model = create_model(
diff --git a/experimental/ragas_experimental/metric/result.py b/experimental/ragas_experimental/metric/result.py
index 19e18aab6..049f8184e 100644
--- a/experimental/ragas_experimental/metric/result.py
+++ b/experimental/ragas_experimental/metric/result.py
@@ -1,16 +1,14 @@
 """MetricResult object to store the result of a metric"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/metric/result.ipynb.
+__all__ = ["MetricResult"]
 
-# %% auto 0
-__all__ = ['MetricResult']
-
-# %% ../../nbs/api/metric/result.ipynb 2
 import typing as t
 
 from fastcore.utils import patch
+from pydantic import GetCoreSchemaHandler, ValidationInfo
+from pydantic_core import core_schema
+
 
-# %% ../../nbs/api/metric/result.ipynb 3
 class MetricResult:
     """Class to hold the result of a metric evaluation.
 
@@ -184,11 +182,7 @@ def to_dict(self):
         """Convert the result to a dictionary."""
         return {"result": self._result, "reason": self.reason}
 
-# %% ../../nbs/api/metric/result.ipynb 7
-from pydantic_core import core_schema
-from pydantic import GetCoreSchemaHandler, ValidationInfo
 
-# %% ../../nbs/api/metric/result.ipynb 8
 @patch(cls_method=True)
 def validate(cls: MetricResult, value: t.Any, info: ValidationInfo):
     """Provide compatibility with older Pydantic versions."""
diff --git a/experimental/ragas_experimental/model/notion_model.py b/experimental/ragas_experimental/model/notion_model.py
index 9ffa9f616..4868783be 100644
--- a/experimental/ragas_experimental/model/notion_model.py
+++ b/experimental/ragas_experimental/model/notion_model.py
@@ -1,21 +1,16 @@
 """NotionModel is a class that allows you to create a model of a Notion database."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/model/notion_model.ipynb.
+__all__ = ["NotionModelMeta", "NotionModel"]
 
-# %% auto 0
-__all__ = ['NotionModelMeta', 'NotionModel']
-
-# %% ../../nbs/model/notion_model.ipynb 3
-from dataclasses import dataclass
 import typing as t
 from datetime import datetime
 
 from fastcore.utils import patch, patch_to
 
 from ..exceptions import ValidationError
-from .notion_typing import Field, ID
+from .notion_typing import ID, Field
+
 
-# %% ../../nbs/model/notion_model.ipynb 4
 class NotionModelMeta(type):
     """Metaclass for NotionModel to handle field registration."""
 
@@ -47,7 +42,7 @@ def __new__(mcs, name: str, bases: tuple, namespace: dict):
         namespace["_fields"] = _fields
         return super().__new__(mcs, name, bases, namespace)
 
-# %% ../../nbs/model/notion_model.ipynb 5
+
 class NotionModel(metaclass=NotionModelMeta):
     """Base class for Notion database models.
 
@@ -126,7 +121,7 @@ def __repr__(self) -> str:
 
         return f"{class_name}({' '.join(parts)})"
 
-# %% ../../nbs/model/notion_model.ipynb 8
+
 @patch
 def to_notion(self: NotionModel) -> dict:
     """Convert the model to Notion API format."""
diff --git a/experimental/ragas_experimental/model/notion_typing.py b/experimental/ragas_experimental/model/notion_typing.py
index 959298e85..03bf5cee1 100644
--- a/experimental/ragas_experimental/model/notion_typing.py
+++ b/experimental/ragas_experimental/model/notion_typing.py
@@ -1,19 +1,25 @@
 """Represents the types of Notion objects like text, number, select, multi-select, etc."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/model/notion_types.ipynb.
+__all__ = [
+    "T",
+    "Field",
+    "ID",
+    "Text",
+    "Title",
+    "Select",
+    "MultiSelect",
+    "URL",
+    "NotionFieldMeta",
+    "TextNew",
+]
 
-# %% auto 0
-__all__ = ['T', 'Field', 'ID', 'Text', 'Title', 'Select', 'MultiSelect', 'URL', 'NotionFieldMeta', 'TextNew']
-
-# %% ../../nbs/model/notion_types.ipynb 2
 import typing as t
-from datetime import datetime
 
 from ..exceptions import ValidationError
 
 T = t.TypeVar("T")
 
-# %% ../../nbs/model/notion_types.ipynb 3
+
 class Field(t.Generic[T]):
     """Base class for all Notion field types."""
 
@@ -63,7 +69,7 @@ def _to_notion_property(self) -> dict:
         """Convert field to Notion property definition format."""
         return {self.name: {"type": self.NOTION_FIELD_TYPE, self.NOTION_FIELD_TYPE: {}}}
 
-# %% ../../nbs/model/notion_types.ipynb 6
+
 class ID(Field[int], int):
     """System ID field type for integer IDs."""
 
@@ -103,7 +109,7 @@ def _from_notion(self, data: dict) -> t.Optional[int]:
     def _to_notion_property(self) -> dict:
         return {self.name: {"type": "unique_id", "unique_id": {"prefix": None}}}
 
-# %% ../../nbs/model/notion_types.ipynb 10
+
 class Text(Field[str], str):
     """Rich text property type."""
 
@@ -143,7 +149,7 @@ def _from_notion(self, data: dict) -> t.Optional[str]:
         # Combine all text chunks into a single string
         return "".join(item["text"]["content"] for item in rich_text if "text" in item)
 
-# %% ../../nbs/model/notion_types.ipynb 15
+
 class Title(Field[str], str):
     """Title property type."""
 
@@ -168,7 +174,7 @@ def _from_notion(self, data: dict) -> t.Optional[str]:
             return None
         return title[0]["text"]["content"]
 
-# %% ../../nbs/model/notion_types.ipynb 16
+
 class Select(Field[str], str):
     """Select property type."""
 
@@ -212,7 +218,7 @@ def _to_notion_property(self) -> dict:
             ]
         return prop
 
-# %% ../../nbs/model/notion_types.ipynb 17
+
 class MultiSelect(Field[list[str]], list):
     """Multi-select property type."""
 
@@ -258,7 +264,7 @@ def _to_notion_property(self) -> dict:
             ]
         return prop
 
-# %% ../../nbs/model/notion_types.ipynb 18
+
 class URL(Field[str], str):
     """URL property type."""
 
@@ -287,7 +293,7 @@ def _from_notion(self, data: dict) -> t.Optional[str]:
             url = data[self.name][self.NOTION_FIELD_TYPE]
         return url
 
-# %% ../../nbs/model/notion_types.ipynb 20
+
 T = t.TypeVar("T")
 
 
@@ -322,7 +328,7 @@ def to_notion_property(self) -> dict:
         """Convert field to Notion property definition."""
         return {self.name: {"type": self.NOTION_FIELD_TYPE, self.NOTION_FIELD_TYPE: {}}}
 
-# %% ../../nbs/model/notion_types.ipynb 21
+
 class TextNew(NotionFieldMeta):
     """Rich text property type for Notion."""
 
diff --git a/experimental/ragas_experimental/model/pydantic_model.py b/experimental/ragas_experimental/model/pydantic_model.py
index 4af436123..664d36936 100644
--- a/experimental/ragas_experimental/model/pydantic_model.py
+++ b/experimental/ragas_experimental/model/pydantic_model.py
@@ -1,18 +1,14 @@
 """An Extended version of Pydantics `BaseModel` for some ragas specific stuff"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/model/pydantic_mode.ipynb.
+__all__ = ["ExtendedPydanticBaseModel"]
 
-# %% auto 0
-__all__ = ['ExtendedPydanticBaseModel']
-
-# %% ../../nbs/api/model/pydantic_mode.ipynb 2
 import typing as t
 
 from pydantic import BaseModel, PrivateAttr
 
 from ..typing import FieldMeta as RagasFieldMeta
 
-# %% ../../nbs/api/model/pydantic_mode.ipynb 3
+
 class ExtendedPydanticBaseModel(BaseModel):
     """Extended Pydantic BaseModel with database integration capabilities"""
 
diff --git a/experimental/ragas_experimental/project.py b/experimental/ragas_experimental/project.py
deleted file mode 100644
index 10980c1cc..000000000
--- a/experimental/ragas_experimental/project.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""How to run experiments"""
-
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/project/02_experiments.ipynb.
-
-# %% auto 0
-__all__ = ['ExperimentProtocol']
-
-# %% ../nbs/project/02_experiments.ipynb 3
-from tqdm import tqdm
-from functools import wraps
-import asyncio
-
-# %% ../nbs/project/02_experiments.ipynb 4
-@patch
-def create_experiment(
-    self: Project, name: str, model: t.Type[NotionModel]
-) -> Experiment:
-    """Create a new experiment view.
-
-    Args:
-        name: Name of the experiment
-        model: Model class defining the experiment structure
-
-    Returns:
-        ExperimentView: View for managing experiment results
-    """
-    if self.experiments_page_id == "":
-        raise ValueError("Experiments page ID is not set")
-
-    # Collect all properties from model fields
-    properties = {}
-    for field_name, field in model._fields.items():
-        properties.update(field._to_notion_property())
-
-    # Create the database
-    database_id = self._notion_backend.create_new_database(
-        parent_page_id=self.experiments_page_id, title=name, properties=properties
-    )
-
-    return Experiment(
-        name=name,
-        model=model,
-        database_id=database_id,
-        notion_backend=self._notion_backend,
-    )
-
-# %% ../nbs/project/02_experiments.ipynb 5
-@patch
-def get_experiment(self: Project, name: str, model: t.Type[NotionModel]) -> Experiment:
-    """Get an existing experiment by name."""
-    if self.experiments_page_id == "":
-        raise ValueError("Experiments page ID is not set")
-
-    # Search for database with given name
-    database_id = self._notion_backend.get_database_id(
-        parent_page_id=self.experiments_page_id, name=name, return_multiple=False
-    )
-
-    return Experiment(
-        name=name,
-        model=model,
-        database_id=database_id,
-        notion_backend=self._notion_backend,
-    )
-
-# %% ../nbs/project/02_experiments.ipynb 6
-@t.runtime_checkable
-class ExperimentProtocol(t.Protocol):
-    async def __call__(self, *args, **kwargs): ...
-    async def run_async(self, name: str, dataset: Dataset): ...
-
-# %% ../nbs/project/02_experiments.ipynb 7
-# this one we have to clean up
-from langfuse.decorators import observe
-
-# %% ../nbs/project/02_experiments.ipynb 8
-@patch
-def experiment(self: Project, experiment_model: t.Type[NotionModel], name_prefix: str = ""):
-    """Decorator for creating experiment functions.
-
-    Args:
-        name_prefix: Optional prefix for experiment names
-
-    Returns:
-        Decorator function that wraps experiment functions
-    """
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-        @wraps(func)
-        async def wrapped_experiment(*args, **kwargs):
-            # wrap the function with langfuse observation so that it can be traced
-            # and spans inside the function can be retrieved with sync_trace()
-            observed_func = observe(name=f"{name_prefix}-{func.__name__}")(func)
-
-            return await observed_func(*args, **kwargs)
-
-        # Add run method to the wrapped function
-        async def run_async(name: str, dataset: Dataset):
-            # Create tasks for all items
-            tasks = []
-            for item in dataset:
-                tasks.append(wrapped_experiment(item))
-
-            # Use as_completed with tqdm for progress tracking
-            results = []
-            for future in tqdm(asyncio.as_completed(tasks), total=len(tasks)):
-                result = await future
-                # Add each result to experiment view as it completes
-                results.append(result)
-
-            # upload results to experiment view
-            experiment_view = self.create_experiment(name=name, model=experiment_model)
-            for result in results:
-                experiment_view.append(result)
-
-            return experiment_view
-
-        wrapped_experiment.__setattr__("run_async", run_async)
-        return t.cast(ExperimentProtocol, wrapped_experiment)
-
-    return decorator
diff --git a/experimental/ragas_experimental/project/__init__.py b/experimental/ragas_experimental/project/__init__.py
index e69de29bb..483221e10 100644
--- a/experimental/ragas_experimental/project/__init__.py
+++ b/experimental/ragas_experimental/project/__init__.py
@@ -0,0 +1,76 @@
+"""Project management module for Ragas experimental framework.
+
+This module provides a clean interface for managing AI projects with support for
+multiple backend storage options including local CSV files and the Ragas app.
+"""
+
+from .backends import (
+    DatasetBackend,
+    ProjectBackend,
+    create_project_backend,
+    list_backends,
+    print_available_backends,
+    register_backend,
+)
+from .core import Project
+from .utils import MemorableNames, create_nano_id, memorable_names
+
+__all__ = [
+    "Project",
+    "create_project",
+    "get_project",
+    "MemorableNames",
+    "memorable_names",
+    "create_nano_id",
+    "ProjectBackend",
+    "DatasetBackend",
+    "create_project_backend",
+    "list_backends",
+    "print_available_backends",
+    "register_backend",
+]
+
+
+def create_project(
+    name: str, description: str = "", backend: str = "local/csv", **kwargs
+) -> Project:
+    """Create a new project with the specified backend.
+
+    Args:
+        name: Name of the project
+        description: Description of the project
+        backend: Backend type ("local/csv" or "ragas/app")
+        **kwargs: Additional backend-specific arguments
+
+    Returns:
+        Project: A new project instance
+
+    Examples:
+        >>> # Create a local project
+        >>> project = create_project("my_project", backend="local/csv", root_dir="/path/to/projects")
+
+        >>> # Create a ragas/app project
+        >>> project = create_project("my_project", backend="ragas/app", ragas_api_client=client)
+    """
+    return Project.create(name=name, description=description, backend=backend, **kwargs)
+
+
+def get_project(name: str, backend: str = "local/csv", **kwargs) -> Project:
+    """Get an existing project by name.
+
+    Args:
+        name: Name of the project to retrieve
+        backend: Backend type ("local/csv" or "ragas/app")
+        **kwargs: Additional backend-specific arguments
+
+    Returns:
+        Project: The existing project instance
+
+    Examples:
+        >>> # Get a local project
+        >>> project = get_project("my_project", backend="local/csv", root_dir="/path/to/projects")
+
+        >>> # Get a ragas/app project
+        >>> project = get_project("my_project", backend="ragas/app", ragas_api_client=client)
+    """
+    return Project.get(name=name, backend=backend, **kwargs)
diff --git a/experimental/ragas_experimental/project/backends/README.md b/experimental/ragas_experimental/project/backends/README.md
new file mode 100644
index 000000000..0b6b009f0
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/README.md
@@ -0,0 +1,519 @@
+# Backend Development Guide
+
+This guide shows you how to add new storage backends to the Ragas project system. The backend architecture supports multiple storage solutions like CSV files, databases, cloud platforms, and more.
+
+## Architecture Overview
+
+The backend system uses a two-layer architecture:
+
+1. **ProjectBackend**: Manages project-level operations (creating datasets/experiments, listing, etc.)
+2. **DatasetBackend**: Handles individual dataset operations (reading/writing entries, CRUD operations)
+
+```python
+# High-level flow
+Project -> ProjectBackend -> DatasetBackend -> Storage (CSV, DB, API, etc.)
+```
+
+### Plugin System
+
+Backends can be added in two ways:
+- **Internal backends**: Built into the main codebase
+- **External plugins**: Distributed as separate pip packages
+
+The system uses a registry pattern with automatic discovery via setuptools entry points.
+
+## Section 1: Adding Internal Backends
+
+Follow these steps to add a new backend to the main ragas_experimental codebase.
+
+### Step 1: Implement the Backend Classes
+
+Create a new file like `my_backend.py` in this directory:
+
+```python
+"""My custom backend implementation."""
+
+import typing as t
+from .base import ProjectBackend, DatasetBackend
+from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel
+
+
+class MyDatasetBackend(DatasetBackend):
+    """Dataset backend for my storage system."""
+    
+    def __init__(self, connection_params: str, dataset_info: dict):
+        self.connection_params = connection_params
+        self.dataset_info = dataset_info
+        self.dataset = None
+    
+    def initialize(self, dataset):
+        """Initialize with dataset instance."""
+        self.dataset = dataset
+        # Setup storage connection, create tables/files, etc.
+    
+    def get_column_mapping(self, model):
+        """Map model fields to storage columns."""
+        # Return mapping between pydantic model fields and storage columns
+        return {field: field for field in model.__annotations__.keys()}
+    
+    def load_entries(self, model_class):
+        """Load all entries from storage."""
+        # Connect to your storage and return list of model instances
+        return []
+    
+    def append_entry(self, entry):
+        """Add new entry and return its ID."""
+        # Add entry to storage and return unique identifier
+        return "entry_id"
+    
+    def update_entry(self, entry):
+        """Update existing entry."""
+        # Update entry in storage based on entry._row_id
+        pass
+    
+    def delete_entry(self, entry_id):
+        """Delete entry by ID."""
+        # Remove entry from storage
+        pass
+    
+    def get_entry_by_field(self, field_name: str, field_value: t.Any, model_class):
+        """Find entry by field value."""
+        # Query storage and return matching entry or None
+        return None
+
+
+class MyProjectBackend(ProjectBackend):
+    """Project backend for my storage system."""
+    
+    def __init__(self, connection_string: str, **kwargs):
+        self.connection_string = connection_string
+        self.project_id = None
+        # Store any additional config from **kwargs
+    
+    def initialize(self, project_id: str, **kwargs):
+        """Initialize with project ID."""
+        self.project_id = project_id
+        # Setup project-level storage, create directories/schemas, etc.
+    
+    def create_dataset(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create new dataset and return ID."""
+        # Create dataset in your storage system
+        dataset_id = f"dataset_{name}"
+        return dataset_id
+    
+    def create_experiment(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create new experiment and return ID."""
+        # Create experiment in your storage system  
+        experiment_id = f"experiment_{name}"
+        return experiment_id
+    
+    def list_datasets(self) -> t.List[t.Dict]:
+        """List all datasets."""
+        # Query your storage and return list of dataset info
+        return [{"id": "dataset_1", "name": "example"}]
+    
+    def list_experiments(self) -> t.List[t.Dict]:
+        """List all experiments."""
+        # Query your storage and return list of experiment info
+        return [{"id": "experiment_1", "name": "example"}]
+    
+    def get_dataset_backend(self, dataset_id: str, name: str, model: t.Type[BaseModel]) -> DatasetBackend:
+        """Get DatasetBackend for specific dataset."""
+        return MyDatasetBackend(
+            connection_params=self.connection_string,
+            dataset_info={"id": dataset_id, "name": name}
+        )
+    
+    def get_experiment_backend(self, experiment_id: str, name: str, model: t.Type[BaseModel]) -> DatasetBackend:
+        """Get DatasetBackend for specific experiment."""
+        return MyDatasetBackend(
+            connection_params=self.connection_string,
+            dataset_info={"id": experiment_id, "name": name}
+        )
+    
+    def get_dataset_by_name(self, name: str, model: t.Type[BaseModel]) -> t.Tuple[str, DatasetBackend]:
+        """Get dataset ID and backend by name."""
+        # Query your storage to find dataset by name
+        dataset_id = f"found_{name}"
+        backend = self.get_dataset_backend(dataset_id, name, model)
+        return dataset_id, backend
+    
+    def get_experiment_by_name(self, name: str, model: t.Type[BaseModel]) -> t.Tuple[str, DatasetBackend]:
+        """Get experiment ID and backend by name."""
+        # Query your storage to find experiment by name
+        experiment_id = f"found_{name}"
+        backend = self.get_experiment_backend(experiment_id, name, model)
+        return experiment_id, backend
+```
+
+### Step 2: Register the Backend
+
+Update `registry.py` to include your backend in the built-in backends:
+
+```python
+# In _register_builtin_backends method
+def _register_builtin_backends(self) -> None:
+    """Register the built-in backends."""
+    try:
+        from .local_csv import LocalCSVProjectBackend
+        self.register_backend("local_csv", LocalCSVProjectBackend, aliases=["local"])
+        
+        from .platform import PlatformProjectBackend
+        self.register_backend("platform", PlatformProjectBackend, aliases=["ragas_app"])
+        
+        # Add your backend here
+        from .my_backend import MyProjectBackend
+        self.register_backend("my_storage", MyProjectBackend, aliases=["custom"])
+        
+    except ImportError as e:
+        logger.warning(f"Failed to import built-in backend: {e}")
+```
+
+### Step 3: Add Entry Point Configuration
+
+Update `experimental/pyproject.toml` to include your backend:
+
+```toml
+[project.entry-points."ragas.backends"]
+local_csv = "ragas_experimental.project.backends.local_csv:LocalCSVProjectBackend"
+platform = "ragas_experimental.project.backends.platform:PlatformProjectBackend"
+my_storage = "ragas_experimental.project.backends.my_backend:MyProjectBackend"
+```
+
+### Step 4: Update Exports
+
+Add your backend to `__init__.py`:
+
+```python
+# Import concrete backends for backward compatibility
+from .local_csv import LocalCSVProjectBackend
+from .platform import PlatformProjectBackend
+from .my_backend import MyProjectBackend  # Add this
+
+__all__ = [
+    "ProjectBackend",
+    "DatasetBackend",
+    # ... other exports ...
+    "MyProjectBackend",  # Add this
+]
+```
+
+### Step 5: Write Tests
+
+Create `test_my_backend.py`:
+
+```python
+"""Tests for my custom backend."""
+
+import pytest
+import tempfile
+from ragas_experimental.project.backends.my_backend import MyProjectBackend, MyDatasetBackend
+
+
+def test_my_backend_creation():
+    """Test backend can be created."""
+    backend = MyProjectBackend(connection_string="test://connection")
+    assert backend.connection_string == "test://connection"
+
+
+def test_my_backend_integration():
+    """Test backend works with project system."""
+    from ragas_experimental.project import create_project
+    
+    project = create_project(
+        name="test_project",
+        backend="my_storage",
+        connection_string="test://connection"
+    )
+    
+    assert project.name == "test_project"
+    # Add more integration tests...
+```
+
+## Section 2: Creating Pip-Installable Backend Plugins
+
+Create a separate Python package that provides a backend plugin.
+
+### Plugin Package Structure
+
+```
+ragas-sqlite-backend/
+├── pyproject.toml
+├── README.md
+├── src/
+│   └── ragas_sqlite_backend/
+│       ├── __init__.py
+│       ├── backend.py
+│       └── dataset.py
+└── tests/
+    └── test_sqlite_backend.py
+```
+
+### Step 1: Create the Plugin Package
+
+**pyproject.toml**:
+```toml
+[build-system]
+requires = ["setuptools>=64", "setuptools_scm>=8"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "ragas-sqlite-backend"
+version = "0.1.0"
+description = "SQLite backend for Ragas experimental projects"
+authors = [{name = "Your Name", email = "your.email@example.com"}]
+requires-python = ">=3.9"
+dependencies = [
+    "ragas_experimental",  # Depend on the main package
+    "sqlite3",  # If not in stdlib
+]
+
+# Define the entry point for backend discovery
+[project.entry-points."ragas.backends"]
+sqlite = "ragas_sqlite_backend.backend:SQLiteProjectBackend"
+
+[project.optional-dependencies]
+dev = ["pytest", "pytest-asyncio"]
+```
+
+**src/ragas_sqlite_backend/backend.py**:
+```python
+"""SQLite backend implementation."""
+
+import sqlite3
+import typing as t
+from pathlib import Path
+
+# Import from the main ragas_experimental package
+from ragas_experimental.project.backends.base import ProjectBackend, DatasetBackend
+from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel
+
+
+class SQLiteDatasetBackend(DatasetBackend):
+    """SQLite implementation of DatasetBackend."""
+    
+    def __init__(self, db_path: str, table_name: str):
+        self.db_path = db_path
+        self.table_name = table_name
+        self.dataset = None
+    
+    def initialize(self, dataset):
+        """Initialize with dataset and create table."""
+        self.dataset = dataset
+        self._create_table_if_not_exists()
+    
+    def _create_table_if_not_exists(self):
+        """Create SQLite table based on model schema."""
+        with sqlite3.connect(self.db_path) as conn:
+            # Create table based on model fields
+            model_fields = self.dataset.model.__annotations__
+            
+            columns = ["_row_id TEXT PRIMARY KEY"]
+            for field_name, field_type in model_fields.items():
+                sql_type = self._python_to_sql_type(field_type)
+                columns.append(f"{field_name} {sql_type}")
+            
+            create_sql = f"CREATE TABLE IF NOT EXISTS {self.table_name} ({', '.join(columns)})"
+            conn.execute(create_sql)
+    
+    def _python_to_sql_type(self, python_type):
+        """Convert Python type to SQLite type."""
+        type_mapping = {
+            str: "TEXT",
+            int: "INTEGER", 
+            float: "REAL",
+            bool: "INTEGER",
+        }
+        return type_mapping.get(python_type, "TEXT")
+    
+    # Implement all other abstract methods...
+    def get_column_mapping(self, model):
+        return {field: field for field in model.__annotations__.keys()}
+    
+    def load_entries(self, model_class):
+        # Implement SQLite loading logic
+        return []
+    
+    def append_entry(self, entry):
+        # Implement SQLite insertion logic
+        return "new_entry_id"
+    
+    # ... implement other required methods
+
+
+class SQLiteProjectBackend(ProjectBackend):
+    """SQLite implementation of ProjectBackend."""
+    
+    def __init__(self, db_path: str = None, **kwargs):
+        self.db_path = db_path or "ragas_project.db"
+        self.project_id = None
+    
+    def initialize(self, project_id: str, **kwargs):
+        """Initialize SQLite database for project."""
+        self.project_id = project_id
+        
+        # Create database file and project metadata table
+        Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
+        
+        with sqlite3.connect(self.db_path) as conn:
+            # Create metadata tables
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS projects (
+                    id TEXT PRIMARY KEY,
+                    name TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+                )
+            """)
+            
+            conn.execute("""
+                CREATE TABLE IF NOT EXISTS datasets (
+                    id TEXT PRIMARY KEY,
+                    project_id TEXT,
+                    name TEXT,
+                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+                    FOREIGN KEY (project_id) REFERENCES projects (id)
+                )
+            """)
+            
+            # Insert project if not exists
+            conn.execute(
+                "INSERT OR IGNORE INTO projects (id, name) VALUES (?, ?)",
+                (project_id, project_id)
+            )
+    
+    # Implement all abstract methods...
+    def create_dataset(self, name: str, model: t.Type[BaseModel]) -> str:
+        # Implement dataset creation in SQLite
+        dataset_id = f"dataset_{name}_{self.project_id}"
+        
+        with sqlite3.connect(self.db_path) as conn:
+            conn.execute(
+                "INSERT INTO datasets (id, project_id, name) VALUES (?, ?, ?)",
+                (dataset_id, self.project_id, name)
+            )
+        
+        return dataset_id
+    
+    def get_dataset_backend(self, dataset_id: str, name: str, model: t.Type[BaseModel]) -> DatasetBackend:
+        """Return SQLite dataset backend."""
+        table_name = f"data_{dataset_id}"
+        return SQLiteDatasetBackend(self.db_path, table_name)
+    
+    # ... implement other required methods
+```
+
+**src/ragas_sqlite_backend/__init__.py**:
+```python
+"""SQLite backend plugin for Ragas experimental."""
+
+from .backend import SQLiteProjectBackend, SQLiteDatasetBackend
+
+__all__ = ["SQLiteProjectBackend", "SQLiteDatasetBackend"]
+```
+
+### Step 2: Publish the Plugin
+
+1. **Build the package**:
+   ```bash
+   pip install build
+   python -m build
+   ```
+
+2. **Upload to PyPI** (optional):
+   ```bash
+   pip install twine
+   twine upload dist/*
+   ```
+
+3. **Install and test**:
+   ```bash
+   pip install ragas-sqlite-backend
+   
+   # The backend should now be automatically discovered
+   python -c "from ragas_experimental.project import list_backends; print(list_backends())"
+   # Should include 'sqlite' in the output
+   ```
+
+### Step 3: Use the Plugin
+
+Once installed, users can use your backend:
+
+```python
+from ragas_experimental.project import create_project
+
+# Use your plugin backend
+project = create_project(
+    name="my_sqlite_project",
+    backend="sqlite",  # Your plugin's entry point name
+    db_path="/path/to/database.db"
+)
+
+# Backend works seamlessly with the rest of the system
+dataset = project.create_dataset("my_data", MyDataModel)
+dataset.add_entries([...])
+```
+
+## Best Practices
+
+### Error Handling
+- Use proper logging: `import logging; logger = logging.getLogger(__name__)`
+- Handle connection failures gracefully
+- Provide meaningful error messages
+
+### Performance
+- Implement connection pooling for database backends
+- Use batch operations when possible
+- Consider caching for frequently accessed data
+
+### Testing
+- Test both ProjectBackend and DatasetBackend separately
+- Include integration tests with the Project class
+- Test error conditions and edge cases
+- Use temporary storage for tests (tempfile, in-memory DBs)
+
+### Documentation
+- Document all configuration parameters
+- Provide usage examples
+- Include troubleshooting guides
+
+### Configuration
+- Accept configuration through constructor kwargs
+- Support environment variables for sensitive data
+- Provide sensible defaults
+
+## Common Patterns
+
+### Connection Management
+```python
+class MyBackend(ProjectBackend):
+    def __init__(self, connection_string: str, **kwargs):
+        self.connection_string = connection_string
+        self._connection = None
+    
+    def _get_connection(self):
+        """Lazy connection initialization."""
+        if self._connection is None:
+            self._connection = create_connection(self.connection_string)
+        return self._connection
+```
+
+### ID Generation
+```python
+from ragas_experimental.project.utils import create_nano_id
+
+def create_dataset(self, name: str, model):
+    dataset_id = create_nano_id()  # Creates unique short ID
+    # ... rest of implementation
+```
+
+### Model Validation
+```python
+def append_entry(self, entry):
+    # Validate entry is correct model type
+    if not isinstance(entry, self.dataset.model):
+        raise ValueError(f"Entry must be instance of {self.dataset.model}")
+    
+    # Add to storage...
+```
+
+For more examples, see the existing `local_csv.py` and `platform.py` implementations in this directory.
\ No newline at end of file
diff --git a/experimental/ragas_experimental/project/backends/__init__.py b/experimental/ragas_experimental/project/backends/__init__.py
new file mode 100644
index 000000000..9db0128ec
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/__init__.py
@@ -0,0 +1,33 @@
+"""Backend factory and exports for project backends."""
+
+from .base import DatasetBackend, ProjectBackend
+
+# Import concrete backends for backward compatibility
+from .local_csv import LocalCSVProjectBackend
+from .platform import PlatformProjectBackend
+from .registry import (
+    BackendRegistry,
+    create_project_backend,
+    get_backend_info,
+    get_registry,
+    list_backend_info,
+    list_backends,
+    print_available_backends,
+    register_backend,
+)
+
+__all__ = [
+    "ProjectBackend",
+    "DatasetBackend",
+    "BackendRegistry",
+    "get_registry",
+    "register_backend",
+    "list_backends",
+    "get_backend_info",
+    "list_backend_info",
+    "print_available_backends",
+    "create_project_backend",
+    # Concrete backends for backward compatibility
+    "LocalCSVProjectBackend",
+    "PlatformProjectBackend",
+]
diff --git a/experimental/ragas_experimental/project/backends/base.py b/experimental/ragas_experimental/project/backends/base.py
new file mode 100644
index 000000000..14b6018ce
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/base.py
@@ -0,0 +1,112 @@
+"""Base classes for project and dataset backends."""
+
+import typing as t
+from abc import ABC, abstractmethod
+
+from ragas_experimental.model.pydantic_model import (
+    ExtendedPydanticBaseModel as BaseModel,
+)
+
+
+class DatasetBackend(ABC):
+    """Abstract base class for dataset backends.
+
+    All dataset storage backends must implement these methods.
+    """
+
+    @abstractmethod
+    def initialize(self, dataset: t.Any) -> None:
+        """Initialize the backend with dataset information"""
+        pass
+
+    @abstractmethod
+    def get_column_mapping(self, model: t.Type[BaseModel]) -> t.Dict[str, str]:
+        """Get mapping between model fields and backend columns"""
+        pass
+
+    @abstractmethod
+    def load_entries(self, model_class) -> t.List[t.Any]:
+        """Load all entries from storage"""
+        pass
+
+    @abstractmethod
+    def append_entry(self, entry) -> str:
+        """Add a new entry to storage and return its ID"""
+        pass
+
+    @abstractmethod
+    def update_entry(self, entry) -> bool:
+        """Update an existing entry in storage"""
+        pass
+
+    @abstractmethod
+    def delete_entry(self, entry_id) -> bool:
+        """Delete an entry from storage"""
+        pass
+
+    @abstractmethod
+    def get_entry_by_field(
+        self, field_name: str, field_value: t.Any, model_class
+    ) -> t.Optional[t.Any]:
+        """Get an entry by field value"""
+        pass
+
+
+class ProjectBackend(ABC):
+    """Abstract base class for project backends.
+
+    Handles project-level operations like creating/listing datasets and experiments.
+    """
+
+    @abstractmethod
+    def initialize(self, project_id: str, **kwargs) -> None:
+        """Initialize the backend with project information"""
+        pass
+
+    @abstractmethod
+    def create_dataset(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new dataset and return its ID"""
+        pass
+
+    @abstractmethod
+    def create_experiment(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new experiment and return its ID"""
+        pass
+
+    @abstractmethod
+    def list_datasets(self) -> t.List[t.Dict]:
+        """List all datasets in the project"""
+        pass
+
+    @abstractmethod
+    def list_experiments(self) -> t.List[t.Dict]:
+        """List all experiments in the project"""
+        pass
+
+    @abstractmethod
+    def get_dataset_backend(
+        self, dataset_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific dataset"""
+        pass
+
+    @abstractmethod
+    def get_experiment_backend(
+        self, experiment_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific experiment"""
+        pass
+
+    @abstractmethod
+    def get_dataset_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get dataset ID and backend by name. Returns (dataset_id, backend)"""
+        pass
+
+    @abstractmethod
+    def get_experiment_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get experiment ID and backend by name. Returns (experiment_id, backend)"""
+        pass
diff --git a/experimental/ragas_experimental/project/backends/local_csv.py b/experimental/ragas_experimental/project/backends/local_csv.py
new file mode 100644
index 000000000..8ccf216f2
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/local_csv.py
@@ -0,0 +1,377 @@
+"""Local CSV backend implementation for projects and datasets."""
+
+import csv
+import os
+import typing as t
+import uuid
+
+from ragas_experimental.model.pydantic_model import (
+    ExtendedPydanticBaseModel as BaseModel,
+)
+
+from ..utils import create_nano_id
+from .base import DatasetBackend, ProjectBackend
+
+
+class LocalCSVDatasetBackend(DatasetBackend):
+    """Local CSV implementation of DatasetBackend."""
+
+    def __init__(
+        self,
+        local_root_dir: str,
+        project_id: str,
+        dataset_id: str,
+        dataset_name: str,
+        datatable_type: t.Literal["datasets", "experiments"],
+    ):
+        self.local_root_dir = local_root_dir
+        self.project_id = project_id
+        self.dataset_id = dataset_id
+        self.dataset_name = dataset_name
+        self.datatable_type = datatable_type
+        self.dataset = None
+
+    def initialize(self, dataset):
+        """Initialize the backend with the dataset instance."""
+        self.dataset = dataset
+        self._ensure_csv_exists()
+
+    def _ensure_csv_exists(self):
+        """Create the CSV file if it doesn't exist."""
+        csv_path = self._get_csv_path()
+
+        # Create directories if needed
+        os.makedirs(os.path.dirname(csv_path), exist_ok=True)
+
+        # Create file with headers if it doesn't exist
+        if not os.path.exists(csv_path):
+            # Include _row_id in the headers
+            if self.dataset is None:
+                raise ValueError(
+                    "Dataset must be initialized before creating CSV headers"
+                )
+            field_names = ["_row_id"] + list(self.dataset.model.__annotations__.keys())
+
+            with open(csv_path, "w", newline="") as f:
+                writer = csv.writer(f)
+                writer.writerow(field_names)
+
+    def _get_csv_path(self):
+        """Get the path to the CSV file."""
+        return os.path.join(
+            self.local_root_dir,
+            self.project_id,
+            self.datatable_type,
+            f"{self.dataset_name}.csv",
+        )
+
+    def get_column_mapping(self, model) -> t.Dict:
+        """Get mapping between model fields and CSV columns."""
+        return model.model_fields
+
+    def load_entries(self, model_class):
+        """Load all entries from the CSV file."""
+        csv_path = self._get_csv_path()
+
+        if not os.path.exists(csv_path):
+            return []
+
+        entries = []
+
+        with open(csv_path, "r", newline="") as f:
+            reader = csv.DictReader(f)
+
+            for row in reader:
+                try:
+                    # Extract row_id and remove from model data
+                    row_id = row.get("_row_id", str(uuid.uuid4()))
+
+                    # Create a copy without _row_id for model instantiation
+                    model_data = {k: v for k, v in row.items() if k != "_row_id"}
+
+                    # Convert types as needed
+                    typed_row = {}
+                    for field, value in model_data.items():
+                        if field in model_class.model_fields:
+                            field_type = model_class.model_fields[field].annotation
+
+                            # Handle basic type conversions
+                            if field_type is int:
+                                typed_row[field] = int(value) if value else 0
+                            elif field_type is float:
+                                typed_row[field] = float(value) if value else 0.0
+                            elif field_type is bool:
+                                typed_row[field] = value.lower() in (
+                                    "true",
+                                    "t",
+                                    "yes",
+                                    "y",
+                                    "1",
+                                )
+                            else:
+                                typed_row[field] = value
+
+                    # Create model instance
+                    entry = model_class(**typed_row)
+
+                    # Set the row ID from CSV
+                    entry._row_id = row_id
+
+                    entries.append(entry)
+                except Exception as e:
+                    print(f"Error loading row from CSV: {e}")
+
+        return entries
+
+    def append_entry(self, entry) -> str:
+        """Add a new entry to the CSV file and return a generated ID."""
+        csv_path = self._get_csv_path()
+
+        # Read existing rows to avoid overwriting
+        existing_rows = []
+        if os.path.exists(csv_path) and os.path.getsize(csv_path) > 0:
+            with open(csv_path, "r", newline="") as f:
+                reader = csv.DictReader(f)
+                existing_rows = list(reader)
+
+        # Generate a row ID if needed
+        row_id = getattr(entry, "_row_id", None) or str(uuid.uuid4())
+
+        # Get field names including row_id
+        field_names = ["_row_id"] + list(entry.model_fields.keys())
+
+        # Convert entry to dict
+        entry_dict = entry.model_dump()
+
+        # Add row_id to the dict
+        entry_dict["_row_id"] = row_id
+
+        # Write all rows back with the new entry
+        with open(csv_path, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=field_names)
+            writer.writeheader()
+
+            # Write existing rows
+            for row in existing_rows:
+                writer.writerow(row)
+
+            # Write new row
+            writer.writerow(entry_dict)
+
+        # Return the row ID
+        return row_id
+
+    def update_entry(self, entry) -> bool:
+        """Update an existing entry in the CSV file."""
+        # Create a copy of entries to modify
+        if self.dataset is None:
+            raise ValueError("Dataset must be initialized")
+        entries_to_save = list(self.dataset._entries)  # Make a copy
+
+        # Find the entry to update
+        updated = False
+        for i, e in enumerate(entries_to_save):
+            if (
+                hasattr(e, "_row_id")
+                and hasattr(entry, "_row_id")
+                and e._row_id == entry._row_id
+            ):
+                # Update the entry in our copy
+                entries_to_save[i] = entry
+                updated = True
+                break
+
+        # If entry wasn't found, just append it
+        if not updated and entries_to_save:
+            entries_to_save.append(entry)
+
+        # Write all entries back to CSV
+        self._write_entries_to_csv(entries_to_save)
+
+        return True
+
+    def delete_entry(self, entry_id) -> bool:
+        """Delete an entry from the CSV file."""
+        # Create a copy of entries to modify, excluding the one to delete
+        if self.dataset is None:
+            raise ValueError("Dataset must be initialized")
+        entries_to_save = []
+        for e in self.dataset._entries:
+            if not (hasattr(e, "_row_id") and e._row_id == entry_id):
+                entries_to_save.append(e)
+
+        # Write all entries back to CSV
+        self._write_entries_to_csv(entries_to_save)
+
+        return True
+
+    def _write_entries_to_csv(self, entries):
+        """Write all entries to the CSV file."""
+        csv_path = self._get_csv_path()
+
+        if not entries:
+            # If no entries, just create an empty CSV with headers
+            if self.dataset is None:
+                raise ValueError("Dataset must be initialized")
+            field_names = ["_row_id"] + list(self.dataset.model.model_fields.keys())
+            with open(csv_path, "w", newline="") as f:
+                writer = csv.DictWriter(f, fieldnames=field_names)
+                writer.writeheader()
+            return
+
+        # Get field names including _row_id
+        field_names = ["_row_id"] + list(entries[0].__class__.model_fields.keys())
+
+        # Write all entries
+        with open(csv_path, "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=field_names)
+            writer.writeheader()
+
+            for entry in entries:
+                # Create a dict with model data + row_id
+                entry_dict = entry.model_dump()
+                entry_dict["_row_id"] = getattr(entry, "_row_id", str(uuid.uuid4()))
+
+                writer.writerow(entry_dict)
+
+    def get_entry_by_field(
+        self, field_name, field_value, model_class
+    ) -> t.Optional[t.Any]:
+        """Get an entry by field value."""
+        entries = self.load_entries(model_class)
+
+        for entry in entries:
+            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
+                return entry
+
+        return None
+
+
+class LocalCSVProjectBackend(ProjectBackend):
+    """Local CSV implementation of ProjectBackend."""
+
+    def __init__(self, root_dir: str):
+        self.root_dir = root_dir
+        self.project_id: t.Optional[str] = None
+
+    def initialize(self, project_id: str, **kwargs):
+        """Initialize the backend with project information."""
+        self.project_id = project_id
+        self._project_dir = os.path.join(self.root_dir, project_id)
+        self._create_project_structure()
+
+    def _create_project_structure(self):
+        """Create the local directory structure for the project."""
+        os.makedirs(self._project_dir, exist_ok=True)
+        # Create datasets directory
+        os.makedirs(os.path.join(self._project_dir, "datasets"), exist_ok=True)
+        # Create experiments directory
+        os.makedirs(os.path.join(self._project_dir, "experiments"), exist_ok=True)
+
+    def create_dataset(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new dataset and return its ID."""
+        dataset_id = create_nano_id()
+        return dataset_id
+
+    def create_experiment(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new experiment and return its ID."""
+        experiment_id = create_nano_id()
+        return experiment_id
+
+    def list_datasets(self) -> t.List[t.Dict]:
+        """List all datasets in the project."""
+        datasets_dir = os.path.join(self._project_dir, "datasets")
+        if not os.path.exists(datasets_dir):
+            return []
+
+        datasets = []
+        for filename in os.listdir(datasets_dir):
+            if filename.endswith(".csv"):
+                name = os.path.splitext(filename)[0]
+                datasets.append(
+                    {
+                        "id": create_nano_id(),  # Generate ID for consistency
+                        "name": name,
+                    }
+                )
+        return datasets
+
+    def list_experiments(self) -> t.List[t.Dict]:
+        """List all experiments in the project."""
+        experiments_dir = os.path.join(self._project_dir, "experiments")
+        if not os.path.exists(experiments_dir):
+            return []
+
+        experiments = []
+        for filename in os.listdir(experiments_dir):
+            if filename.endswith(".csv"):
+                name = os.path.splitext(filename)[0]
+                experiments.append(
+                    {
+                        "id": create_nano_id(),  # Generate ID for consistency
+                        "name": name,
+                    }
+                )
+        return experiments
+
+    def get_dataset_backend(
+        self, dataset_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific dataset."""
+        if self.project_id is None:
+            raise ValueError(
+                "Backend must be initialized before creating dataset backend"
+            )
+        return LocalCSVDatasetBackend(
+            local_root_dir=self.root_dir,
+            project_id=self.project_id,
+            dataset_id=dataset_id,
+            dataset_name=name,
+            datatable_type="datasets",
+        )
+
+    def get_experiment_backend(
+        self, experiment_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific experiment."""
+        if self.project_id is None:
+            raise ValueError(
+                "Backend must be initialized before creating experiment backend"
+            )
+        return LocalCSVDatasetBackend(
+            local_root_dir=self.root_dir,
+            project_id=self.project_id,
+            dataset_id=experiment_id,
+            dataset_name=name,
+            datatable_type="experiments",
+        )
+
+    def get_dataset_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get dataset ID and backend by name."""
+        # Check if the dataset file exists
+        dataset_path = os.path.join(self._project_dir, "datasets", f"{name}.csv")
+        if not os.path.exists(dataset_path):
+            raise ValueError(f"Dataset '{name}' does not exist")
+
+        # Create dataset instance with a random ID
+        dataset_id = create_nano_id()
+        backend = self.get_dataset_backend(dataset_id, name, model)
+
+        return dataset_id, backend
+
+    def get_experiment_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get experiment ID and backend by name."""
+        # Check if the experiment file exists
+        experiment_path = os.path.join(self._project_dir, "experiments", f"{name}.csv")
+        if not os.path.exists(experiment_path):
+            raise ValueError(f"Experiment '{name}' does not exist")
+
+        # Create experiment instance with a random ID
+        experiment_id = create_nano_id()
+        backend = self.get_experiment_backend(experiment_id, name, model)
+
+        return experiment_id, backend
diff --git a/experimental/ragas_experimental/project/backends/platform.py b/experimental/ragas_experimental/project/backends/platform.py
new file mode 100644
index 000000000..52b93dcbe
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/platform.py
@@ -0,0 +1,354 @@
+"""Platform (Ragas API) backend implementation for projects and datasets."""
+
+import asyncio
+import typing as t
+
+import ragas_experimental.typing as rt
+from ragas_experimental.model.pydantic_model import (
+    ExtendedPydanticBaseModel as BaseModel,
+)
+
+from ...backends.ragas_api_client import RagasApiClient
+from ...utils import async_to_sync
+from ..utils import create_nano_id
+from .base import DatasetBackend, ProjectBackend
+
+
+class PlatformDatasetBackend(DatasetBackend):
+    """Platform API implementation of DatasetBackend."""
+
+    def __init__(
+        self,
+        ragas_api_client: RagasApiClient,
+        project_id: str,
+        dataset_id: str,
+        datatable_type: t.Literal["datasets", "experiments"],
+    ):
+        self.ragas_api_client = ragas_api_client
+        self.project_id = project_id
+        self.dataset_id = dataset_id
+        self.datatable_type = datatable_type
+        self.dataset = None
+
+    def initialize(self, dataset):
+        """Initialize the backend with the dataset instance."""
+        self.dataset = dataset
+
+    def get_column_mapping(self, model):
+        """Get mapping between model fields and backend columns."""
+        if self.datatable_type == "datasets":
+            sync_func = async_to_sync(self.ragas_api_client.list_dataset_columns)
+            columns = sync_func(project_id=self.project_id, dataset_id=self.dataset_id)
+        else:  # experiments
+            sync_func = async_to_sync(self.ragas_api_client.list_experiment_columns)
+            columns = sync_func(
+                project_id=self.project_id, experiment_id=self.dataset_id
+            )
+
+        column_id_map = {column["name"]: column["id"] for column in columns["items"]}
+
+        # Update the model's column mapping with the values from the API
+        column_mapping = {}
+        for field_name in model.__annotations__:
+            if field_name in column_id_map:
+                column_mapping[field_name] = column_id_map[field_name]
+
+        return column_mapping
+
+    def load_entries(self, model_class) -> t.List[t.Any]:
+        """Load all entries from the API."""
+        # Get all rows
+        if self.datatable_type == "datasets":
+            sync_func = async_to_sync(self.ragas_api_client.list_dataset_rows)
+            response = sync_func(project_id=self.project_id, dataset_id=self.dataset_id)
+        else:  # experiments
+            sync_func = async_to_sync(self.ragas_api_client.list_experiment_rows)
+            response = sync_func(
+                project_id=self.project_id, experiment_id=self.dataset_id
+            )
+
+        # Get column mapping (ID -> name)
+        column_map = {v: k for k, v in model_class.__column_mapping__.items()}
+
+        # Process rows
+        entries = []
+        for row in response.get("items", []):
+            model_data = {}
+            row_id = row.get("id")
+
+            # Convert from API data format to model fields
+            for col_id, value in row.get("data", {}).items():
+                if col_id in column_map:
+                    field_name = column_map[col_id]
+                    model_data[field_name] = value
+
+            # Create model instance
+            entry = model_class(**model_data)
+
+            # Store row ID for future operations
+            entry._row_id = row_id
+
+            entries.append(entry)
+
+        return entries
+
+    def append_entry(self, entry) -> str:
+        """Add a new entry to the API and return its ID."""
+        # Get column mapping
+        column_id_map = entry.__class__.__column_mapping__
+
+        # Create row data
+        row_dict_converted = rt.ModelConverter.instance_to_row(entry)
+        row_id = create_nano_id()
+        row_data = {}
+
+        for column in row_dict_converted["data"]:
+            if column["column_id"] in column_id_map:
+                row_data[column_id_map[column["column_id"]]] = column["data"]
+
+        # Create row in API
+        if self.datatable_type == "datasets":
+            sync_func = async_to_sync(self.ragas_api_client.create_dataset_row)
+            response = sync_func(
+                project_id=self.project_id,
+                dataset_id=self.dataset_id,
+                id=row_id,
+                data=row_data,
+            )
+        else:  # experiments
+            sync_func = async_to_sync(self.ragas_api_client.create_experiment_row)
+            response = sync_func(
+                project_id=self.project_id,
+                experiment_id=self.dataset_id,
+                id=row_id,
+                data=row_data,
+            )
+
+        # Return the row ID
+        return response["id"]
+
+    def update_entry(self, entry) -> bool:
+        """Update an existing entry in the API."""
+        # Get the row ID
+        row_id = None
+        if hasattr(entry, "_row_id") and entry._row_id:
+            row_id = entry._row_id
+        else:
+            raise ValueError("Cannot update: entry has no row ID")
+
+        # Get column mapping and prepare data
+        column_id_map = entry.__class__.__column_mapping__
+        row_dict = rt.ModelConverter.instance_to_row(entry)["data"]
+        row_data = {}
+
+        for column in row_dict:
+            if column["column_id"] in column_id_map:
+                row_data[column_id_map[column["column_id"]]] = column["data"]
+
+        # Update in API
+        if self.datatable_type == "datasets":
+            sync_func = async_to_sync(self.ragas_api_client.update_dataset_row)
+            response = sync_func(
+                project_id=self.project_id,
+                dataset_id=self.dataset_id,
+                row_id=row_id,
+                data=row_data,
+            )
+        else:  # experiments
+            sync_func = async_to_sync(self.ragas_api_client.update_experiment_row)
+            response = sync_func(
+                project_id=self.project_id,
+                experiment_id=self.dataset_id,
+                row_id=row_id,
+                data=row_data,
+            )
+
+        return response
+
+    def delete_entry(self, entry_id) -> bool:
+        """Delete an entry from the API."""
+        # Delete the row
+        if self.datatable_type == "datasets":
+            sync_func = async_to_sync(self.ragas_api_client.delete_dataset_row)
+            response = sync_func(
+                project_id=self.project_id, dataset_id=self.dataset_id, row_id=entry_id
+            )
+        else:  # experiments
+            sync_func = async_to_sync(self.ragas_api_client.delete_experiment_row)
+            response = sync_func(
+                project_id=self.project_id,
+                experiment_id=self.dataset_id,
+                row_id=entry_id,
+            )
+
+        return response
+
+    def get_entry_by_field(
+        self, field_name, field_value, model_class
+    ) -> t.Optional[t.Any]:
+        """Get an entry by field value."""
+        # We don't have direct filtering in the API, so load all and filter
+        entries = self.load_entries(model_class)
+
+        # Search for matching entry
+        for entry in entries:
+            if hasattr(entry, field_name) and getattr(entry, field_name) == field_value:
+                return entry
+
+        return None
+
+
+async def create_dataset_columns(
+    project_id, dataset_id, columns, create_dataset_column_func
+):
+    """Helper function to create dataset columns."""
+    tasks = []
+    for column in columns:
+        tasks.append(
+            create_dataset_column_func(
+                project_id=project_id,
+                dataset_id=dataset_id,
+                id=create_nano_id(),
+                name=column["name"],
+                type=column["type"],
+                settings=column["settings"],
+            )
+        )
+    return await asyncio.gather(*tasks)
+
+
+async def create_experiment_columns(
+    project_id, experiment_id, columns, create_experiment_column_func
+):
+    """Helper function to create experiment columns."""
+    tasks = []
+    for column in columns:
+        tasks.append(
+            create_experiment_column_func(
+                project_id=project_id,
+                experiment_id=experiment_id,
+                id=create_nano_id(),
+                name=column["name"],
+                type=column["type"],
+                settings=column["settings"],
+            )
+        )
+    return await asyncio.gather(*tasks)
+
+
+class PlatformProjectBackend(ProjectBackend):
+    """Platform API implementation of ProjectBackend."""
+
+    def __init__(self, ragas_api_client: RagasApiClient):
+        self.ragas_api_client = ragas_api_client
+        self.project_id: t.Optional[str] = None
+
+    def initialize(self, project_id: str, **kwargs):
+        """Initialize the backend with project information."""
+        self.project_id = project_id
+
+    def create_dataset(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new dataset and return its ID."""
+        # Create the dataset
+        sync_version = async_to_sync(self.ragas_api_client.create_dataset)
+        dataset_info = sync_version(
+            project_id=self.project_id,
+            name=name,
+        )
+
+        # Create the columns for the dataset
+        column_types = rt.ModelConverter.model_to_columns(model)
+        sync_create_columns = async_to_sync(create_dataset_columns)
+        sync_create_columns(
+            project_id=self.project_id,
+            dataset_id=dataset_info["id"],
+            columns=column_types,
+            create_dataset_column_func=self.ragas_api_client.create_dataset_column,
+        )
+
+        return dataset_info["id"]
+
+    def create_experiment(self, name: str, model: t.Type[BaseModel]) -> str:
+        """Create a new experiment and return its ID."""
+        # Create the experiment in the API
+        sync_version = async_to_sync(self.ragas_api_client.create_experiment)
+        experiment_info = sync_version(
+            project_id=self.project_id,
+            name=name,
+        )
+
+        # Create the columns for the experiment
+        column_types = rt.ModelConverter.model_to_columns(model)
+        sync_version = async_to_sync(create_experiment_columns)
+        sync_version(
+            project_id=self.project_id,
+            experiment_id=experiment_info["id"],
+            columns=column_types,
+            create_experiment_column_func=self.ragas_api_client.create_experiment_column,
+        )
+
+        return experiment_info["id"]
+
+    def list_datasets(self) -> t.List[t.Dict]:
+        """List all datasets in the project."""
+        sync_version = async_to_sync(self.ragas_api_client.list_datasets)
+        datasets = sync_version(project_id=self.project_id)
+        return datasets.get("items", [])
+
+    def list_experiments(self) -> t.List[t.Dict]:
+        """List all experiments in the project."""
+        sync_version = async_to_sync(self.ragas_api_client.list_experiments)
+        experiments = sync_version(project_id=self.project_id)
+        return experiments.get("items", [])
+
+    def get_dataset_backend(
+        self, dataset_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific dataset."""
+        if self.project_id is None:
+            raise ValueError(
+                "Backend must be initialized before creating dataset backend"
+            )
+        return PlatformDatasetBackend(
+            ragas_api_client=self.ragas_api_client,
+            project_id=self.project_id,
+            dataset_id=dataset_id,
+            datatable_type="datasets",
+        )
+
+    def get_experiment_backend(
+        self, experiment_id: str, name: str, model: t.Type[BaseModel]
+    ) -> DatasetBackend:
+        """Get a DatasetBackend instance for a specific experiment."""
+        if self.project_id is None:
+            raise ValueError(
+                "Backend must be initialized before creating experiment backend"
+            )
+        return PlatformDatasetBackend(
+            ragas_api_client=self.ragas_api_client,
+            project_id=self.project_id,
+            dataset_id=experiment_id,
+            datatable_type="experiments",
+        )
+
+    def get_dataset_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get dataset ID and backend by name."""
+        # Search for dataset with given name
+        sync_version = async_to_sync(self.ragas_api_client.get_dataset_by_name)
+        dataset_info = sync_version(project_id=self.project_id, dataset_name=name)
+
+        backend = self.get_dataset_backend(dataset_info["id"], name, model)
+        return dataset_info["id"], backend
+
+    def get_experiment_by_name(
+        self, name: str, model: t.Type[BaseModel]
+    ) -> t.Tuple[str, DatasetBackend]:
+        """Get experiment ID and backend by name."""
+        # Search for experiment with given name
+        sync_version = async_to_sync(self.ragas_api_client.get_experiment_by_name)
+        experiment_info = sync_version(project_id=self.project_id, experiment_name=name)
+
+        backend = self.get_experiment_backend(experiment_info["id"], name, model)
+        return experiment_info["id"], backend
diff --git a/experimental/ragas_experimental/project/backends/registry.py b/experimental/ragas_experimental/project/backends/registry.py
new file mode 100644
index 000000000..39af0fbff
--- /dev/null
+++ b/experimental/ragas_experimental/project/backends/registry.py
@@ -0,0 +1,333 @@
+"""Backend registry for managing and discovering project backends."""
+
+import logging
+import typing as t
+from importlib import metadata
+
+from .base import ProjectBackend
+
+logger = logging.getLogger(__name__)
+
+
+class BackendRegistry:
+    """Registry for managing project backends with plugin support."""
+
+    _instance = None
+    _backends: t.Dict[str, t.Type[ProjectBackend]] = {}
+    _aliases: t.Dict[str, str] = {}
+    _discovered = False
+
+    def __new__(cls):
+        """Singleton pattern to ensure single registry instance."""
+        if cls._instance is None:
+            cls._instance = super().__new__(cls)
+        return cls._instance
+
+    @classmethod
+    def instance(cls) -> "BackendRegistry":
+        """Get the singleton registry instance."""
+        if cls._instance is None:
+            cls._instance = cls()
+        return cls._instance
+
+    def register_backend(
+        self,
+        name: str,
+        backend_class: t.Type[ProjectBackend],
+        aliases: t.Optional[t.List[str]] = None,
+        overwrite: bool = False,
+    ) -> None:
+        """Register a backend class with the registry.
+
+        Args:
+            name: Primary name for the backend
+            backend_class: The backend class to register
+            aliases: Optional list of alternative names for the backend
+            overwrite: Whether to overwrite existing backends with the same name
+
+        Raises:
+            TypeError: If backend_class doesn't inherit from ProjectBackend
+            ValueError: If backend name already exists and overwrite=False
+        """
+        if not name or not isinstance(name, str):
+            raise ValueError("Backend name must be a non-empty string")
+
+        if not issubclass(backend_class, ProjectBackend):
+            raise TypeError(
+                f"Backend class {backend_class} must inherit from ProjectBackend"
+            )
+
+        # Check for existing registration
+        if name in self._backends and not overwrite:
+            raise ValueError(
+                f"Backend '{name}' is already registered. Use overwrite=True to replace."
+            )
+
+        self._backends[name] = backend_class
+        logger.debug(f"Registered backend: {name} -> {backend_class}")
+
+        # Register aliases
+        if aliases:
+            for alias in aliases:
+                if not alias or not isinstance(alias, str):
+                    logger.warning(
+                        f"Invalid alias '{alias}' for backend '{name}', skipping"
+                    )
+                    continue
+
+                if alias in self._aliases and not overwrite:
+                    logger.warning(f"Alias '{alias}' already exists, skipping")
+                    continue
+
+                self._aliases[alias] = name
+                logger.debug(f"Registered backend alias: {alias} -> {name}")
+
+    def get_backend(self, name: str) -> t.Type[ProjectBackend]:
+        """Get a backend class by name.
+
+        Args:
+            name: Name or alias of the backend
+
+        Returns:
+            The backend class
+
+        Raises:
+            ValueError: If backend is not found
+        """
+        # Ensure backends are discovered
+        if not self._discovered:
+            self.discover_backends()
+
+        # Check if it's an alias first
+        if name in self._aliases:
+            name = self._aliases[name]
+
+        if name not in self._backends:
+            available = list(self._backends.keys()) + list(self._aliases.keys())
+            raise ValueError(
+                f"Backend '{name}' not found. Available backends: {available}"
+            )
+
+        return self._backends[name]
+
+    def list_available_backends(self) -> t.List[str]:
+        """List all available backend names.
+
+        Returns:
+            List of backend names (primary names only, not aliases)
+        """
+        if not self._discovered:
+            self.discover_backends()
+
+        return list(self._backends.keys())
+
+    def list_all_names(self) -> t.Dict[str, t.List[str]]:
+        """List all backend names including aliases.
+
+        Returns:
+            Dictionary mapping primary names to lists of all names (including aliases)
+        """
+        if not self._discovered:
+            self.discover_backends()
+
+        result = {}
+        for primary_name in self._backends.keys():
+            aliases = [
+                alias
+                for alias, target in self._aliases.items()
+                if target == primary_name
+            ]
+            result[primary_name] = [primary_name] + aliases
+
+        return result
+
+    def discover_backends(self) -> t.Dict[str, t.Type[ProjectBackend]]:
+        """Discover and register backends from entry points and manual registration.
+
+        Returns:
+            Dictionary of discovered backends
+        """
+        if self._discovered:
+            return self._backends.copy()
+
+        logger.debug("Discovering backends...")
+
+        # First register built-in backends manually (for now)
+        self._register_builtin_backends()
+
+        # Then discover from entry points
+        self._discover_from_entry_points()
+
+        self._discovered = True
+        logger.info(
+            f"Backend discovery complete. Found {len(self._backends)} backends."
+        )
+
+        return self._backends.copy()
+
+    def _register_builtin_backends(self) -> None:
+        """Register the built-in backends."""
+        try:
+            from .local_csv import LocalCSVProjectBackend
+
+            self.register_backend("local/csv", LocalCSVProjectBackend)
+
+            from .platform import PlatformProjectBackend
+
+            self.register_backend("ragas/app", PlatformProjectBackend)
+
+        except ImportError as e:
+            logger.warning(f"Failed to import built-in backend: {e}")
+
+    def _discover_from_entry_points(self) -> None:
+        """Discover backends from setuptools entry points."""
+        try:
+            # Look for entry points in the 'ragas.backends' group
+            entry_points = metadata.entry_points().select(group="ragas.backends")
+
+            for entry_point in entry_points:
+                try:
+                    backend_class = entry_point.load()
+                    self.register_backend(entry_point.name, backend_class)
+                    logger.info(
+                        f"Discovered backend from entry point: {entry_point.name}"
+                    )
+
+                except Exception as e:
+                    logger.warning(f"Failed to load backend '{entry_point.name}': {e}")
+
+        except Exception as e:
+            logger.debug(
+                f"Entry point discovery failed (this is normal if no plugins installed): {e}"
+            )
+
+    def get_backend_info(self, name: str) -> t.Dict[str, t.Any]:
+        """Get detailed information about a backend.
+
+        Args:
+            name: Name or alias of the backend
+
+        Returns:
+            Dictionary with backend information
+        """
+        backend_class = self.get_backend(name)
+
+        # Resolve to primary name if it's an alias
+        primary_name = name
+        if name in self._aliases:
+            primary_name = self._aliases[name]
+
+        # Get all aliases for this backend
+        aliases = [
+            alias for alias, target in self._aliases.items() if target == primary_name
+        ]
+
+        return {
+            "name": primary_name,
+            "class": backend_class,
+            "module": backend_class.__module__,
+            "aliases": aliases,
+            "doc": backend_class.__doc__ or "No documentation available",
+        }
+
+    def list_backend_info(self) -> t.List[t.Dict[str, t.Any]]:
+        """List detailed information about all backends.
+
+        Returns:
+            List of dictionaries with backend information
+        """
+        if not self._discovered:
+            self.discover_backends()
+
+        return [self.get_backend_info(name) for name in self._backends.keys()]
+
+    def clear(self) -> None:
+        """Clear all registered backends. Mainly for testing."""
+        self._backends.clear()
+        self._aliases.clear()
+        self._discovered = False
+
+    def create_backend(self, backend_type: str, **kwargs) -> ProjectBackend:
+        """Create a backend instance.
+
+        Args:
+            backend_type: The type of backend to create
+            **kwargs: Arguments specific to the backend
+
+        Returns:
+            ProjectBackend: An instance of the requested backend
+        """
+        backend_class = self.get_backend(backend_type)
+        return backend_class(**kwargs)
+
+
+# Global registry instance
+_registry = BackendRegistry.instance()
+
+
+def get_registry() -> BackendRegistry:
+    """Get the global backend registry instance."""
+    return _registry
+
+
+def register_backend(
+    name: str,
+    backend_class: t.Type[ProjectBackend],
+    aliases: t.Optional[t.List[str]] = None,
+) -> None:
+    """Register a backend with the global registry.
+
+    Args:
+        name: Primary name for the backend
+        backend_class: The backend class to register
+        aliases: Optional list of alternative names for the backend
+    """
+    _registry.register_backend(name, backend_class, aliases)
+
+
+def list_backends() -> t.List[str]:
+    """List all available backend names."""
+    return _registry.list_available_backends()
+
+
+def get_backend_info(name: str) -> t.Dict[str, t.Any]:
+    """Get detailed information about a specific backend."""
+    return _registry.get_backend_info(name)
+
+
+def list_backend_info() -> t.List[t.Dict[str, t.Any]]:
+    """List detailed information about all available backends."""
+    return _registry.list_backend_info()
+
+
+def print_available_backends() -> None:
+    """Print a formatted list of available backends."""
+    backends = _registry.list_backend_info()
+
+    if not backends:
+        print("No backends available.")
+        return
+
+    print("Available backends:")
+    print("-" * 50)
+
+    for backend in backends:
+        print(f"Name: {backend['name']}")
+        if backend["aliases"]:
+            print(f"Aliases: {', '.join(backend['aliases'])}")
+        print(f"Module: {backend['module']}")
+        print(f"Description: {backend['doc']}")
+        print("-" * 50)
+
+
+def create_project_backend(backend_type: str, **kwargs) -> ProjectBackend:
+    """Create a project backend instance.
+
+    Args:
+        backend_type: The type of backend to create
+        **kwargs: Arguments specific to the backend
+
+    Returns:
+        ProjectBackend: An instance of the requested backend
+    """
+    return _registry.create_backend(backend_type, **kwargs)
diff --git a/experimental/ragas_experimental/project/comparison.py b/experimental/ragas_experimental/project/comparison.py
deleted file mode 100644
index 0621b787a..000000000
--- a/experimental/ragas_experimental/project/comparison.py
+++ /dev/null
@@ -1,160 +0,0 @@
-"""Create Comparison views with different experiments"""
-
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/project/comparison.ipynb.
-
-# %% auto 0
-__all__ = ['logger']
-
-# %% ../../nbs/project/comparison.ipynb 3
-import typing as t
-import logging
-
-from fastcore.utils import patch
-from tqdm import tqdm
-
-from .core import Project
-from ..model.notion_model import NotionModel
-import ragas_experimental.model.notion_typing as nmt
-from ..experiment import Experiment
-from ..dataset import Dataset
-
-# %% ../../nbs/project/comparison.ipynb 4
-logger = logging.getLogger(__name__)
-
-# %% ../../nbs/project/comparison.ipynb 5
-# utility function to check if a model has a title property and get the name of the title property
-@t.overload
-def _get_title_property(
-    model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[True] = True
-) -> str: ...
-@t.overload
-def _get_title_property(
-    model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[False] = False
-) -> t.Optional[str]: ...
-def _get_title_property(
-    model: NotionModel | t.Type[NotionModel], raise_exception: bool = True
-) -> t.Optional[str]:
-    has_title = False
-    for field in model._fields.keys():
-        if isinstance(model._fields[field], nmt.Title):
-            has_title = True
-            title_property = field
-            return title_property
-
-    if not has_title:
-        if raise_exception:
-            raise ValueError("Model has no title property")
-        else:
-            return None
-
-# %% ../../nbs/project/comparison.ipynb 8
-def _validate_experiments(experiments: t.Sequence[Experiment]):
-    # validate we have more than 2 experiments
-    if len(experiments) < 2:
-        raise ValueError("We need at least 2 experiments to compare")
-
-    # validate that all experiments are of the same model
-    top_exp = experiments[0]
-    title_property = _get_title_property(top_exp.model)
-    for exp in experiments:
-        if not isinstance(exp, Experiment):
-            raise ValueError("All experiments must be of type Experiment")
-        if top_exp != exp.model:
-            logger.warning(
-                f"Experiments have different models: {top_exp.model} and {exp.model}"
-            )
-        if title_property != _get_title_property(exp.model):
-            raise ValueError("All experiments must have the same title property.")
-
-# %% ../../nbs/project/comparison.ipynb 12
-def _model_to_dict(model: NotionModel) -> dict:
-    # drop ID filed
-    data = {}
-    for field_name in model._fields.keys():
-        if isinstance(model._fields[field_name], nmt.ID):
-            continue
-        data[field_name] = model.__getattribute__(field_name)
-    return data
-
-# %% ../../nbs/project/comparison.ipynb 14
-def _combine_experiments(experiments: t.Sequence[Experiment]):
-    """Group experiment rows by their title property value."""
-    if not experiments:
-        return []
-
-    title_property: str = _get_title_property(experiments[0].model)
-
-    # Create a dictionary to group rows by title value
-    grouped_by_title = {}
-
-    # Process each experiment
-    for exp in experiments:
-        for row in exp:
-            title_value = getattr(row, title_property)
-
-            # Create key if it doesn't exist
-            if title_value not in grouped_by_title:
-                grouped_by_title[title_value] = []
-
-            # Add this row to the appropriate group
-            row_dict = _model_to_dict(row)
-            row_dict["experiment_name"] = exp.name
-            grouped_by_title[title_value].append(row_dict)
-
-    # Convert dictionary to list and add id_str
-    result = []
-    for i, (_, rows) in enumerate(grouped_by_title.items()):
-        for row in rows:
-            row["id_str"] = str(i)
-        result.append(rows)
-
-    return result
-
-# %% ../../nbs/project/comparison.ipynb 17
-@patch
-def compare_experiments(
-    self: Project,
-    *experiments: Experiment,
-):
-    _validate_experiments(experiments)
-
-    # create a combined Model with all the fields of the experiments
-    class CombinedModel(NotionModel):
-        id_str: str = nmt.Text()
-        experiment_name: str = nmt.Text()
-
-    for exp in experiments:
-        for field in exp.model._fields.keys():
-            if field not in CombinedModel._fields:
-                CombinedModel._fields[field] = exp.model._fields[field]
-
-    # create a new database with the combined model
-    properties = {}
-    for field in CombinedModel._fields.keys():
-        properties.update(CombinedModel._fields[field]._to_notion_property())
-    comparison_database_id = self._notion_backend.create_new_database(
-        parent_page_id=self.comparisons_page_id,
-        title=f"{' and '.join([exp.name for exp in experiments])}",
-        properties=properties,
-    )
-
-    # make sure all experiments are synced to upstream
-    for exp in experiments:
-        exp.load()
-
-    # group together by title property
-    grouped_experiments = _combine_experiments(experiments)
-
-    # append these to database
-    for grouped_row in tqdm(grouped_experiments, desc="Uploading to Notion"):
-        for row in grouped_row:
-            combined_model_instance = CombinedModel(**row)
-            self._notion_backend.create_page_in_database(
-                database_id=comparison_database_id,
-                properties=combined_model_instance.to_notion()["properties"],
-            )
-    # Get the URL for the created database
-    # The format for Notion URLs is: https://www.notion.so/{database_id}
-    notion_url = f"https://www.notion.so/{comparison_database_id.replace('-', '')}"
-
-    return notion_url
diff --git a/experimental/ragas_experimental/project/core.py b/experimental/ragas_experimental/project/core.py
index b709e1171..e2c0ae114 100644
--- a/experimental/ragas_experimental/project/core.py
+++ b/experimental/ragas_experimental/project/core.py
@@ -1,183 +1,372 @@
 """Use this class to represent the AI project that we are working on and to interact with datasets and experiments in it."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/project/core.ipynb.
+__all__ = ["Project"]
 
-# %% auto 0
-__all__ = ['Project']
-
-# %% ../../nbs/api/project/core.ipynb 4
-import typing as t
 import os
-import asyncio
+import shutil
+import typing as t
 
-from fastcore.utils import patch
-from pydantic import BaseModel
+import ragas_experimental.typing as rt
+from ragas_experimental.model.pydantic_model import (
+    ExtendedPydanticBaseModel as BaseModel,
+)
 
 from ..backends.factory import RagasApiClientFactory
 from ..backends.ragas_api_client import RagasApiClient
-import ragas_experimental.typing as rt
-from ..utils import async_to_sync, create_nano_id
 from ..dataset import Dataset
 from ..experiment import Experiment
+from ..utils import async_to_sync
+from .backends import ProjectBackend
+from .backends.local_csv import LocalCSVProjectBackend
+from .backends.platform import PlatformProjectBackend
+from .decorators import add_experiment_decorators
+
 
-# %% ../../nbs/api/project/core.ipynb 5
 class Project:
+    """Represents an AI project for managing datasets and experiments."""
+
     def __init__(
         self,
         project_id: str,
-        backend: rt.SUPPORTED_BACKENDS = "local",
-        root_dir: t.Optional[str] = None,
-        ragas_api_client: t.Optional[RagasApiClient] = None,
+        project_backend: ProjectBackend,
+        name: t.Optional[str] = None,
+        description: t.Optional[str] = None,
     ):
+        """Initialize a Project with a backend.
+
+        Args:
+            project_id: Unique identifier for the project
+            project_backend: Backend instance for project operations
+            name: Human-readable name for the project
+            description: Optional description of the project
+        """
         self.project_id = project_id
-        self.backend = backend
+        self._backend = project_backend
+        self.name = name or project_id
+        self.description = description or ""
+
+        # Initialize the backend with project information
+        self._backend.initialize(project_id)
 
-        if backend == "local":
+        # Add experiment decorator methods
+        add_experiment_decorators(self)
+
+    @classmethod
+    def create(
+        cls,
+        name: str,
+        description: str = "",
+        backend: rt.SUPPORTED_BACKENDS = "local/csv",
+        root_dir: t.Optional[str] = None,
+        ragas_api_client: t.Optional[RagasApiClient] = None,
+    ) -> "Project":
+        """Create a new project.
+
+        Args:
+            name: Name of the project
+            description: Description of the project
+            backend: Backend type ("local/csv" or "ragas/app")
+            root_dir: Root directory for local backends
+            ragas_api_client: API client for ragas/app backend
+
+        Returns:
+            Project: A new project instance
+        """
+        if backend == "ragas/app":
+            ragas_api_client = ragas_api_client or RagasApiClientFactory.create()
+            sync_version = async_to_sync(ragas_api_client.create_project)
+            new_project = sync_version(title=name, description=description)
+
+            project_backend = PlatformProjectBackend(ragas_api_client)
+            return cls(
+                project_id=new_project["id"],
+                project_backend=project_backend,
+                name=new_project["title"],
+                description=new_project["description"],
+            )
+        elif backend == "local/csv":
             if root_dir is None:
-                raise ValueError("root_dir is required for local backend")
-            self._root_dir = os.path.join(root_dir, project_id)
-            # Ensure project directory structure exists
-            self._create_local_project_structure()
-        elif backend == "ragas_app":
+                raise ValueError("root_dir is required for local/csv backend")
+
+            project_backend = LocalCSVProjectBackend(root_dir)
+            return cls(
+                project_id=name,  # Use name as project_id for local
+                project_backend=project_backend,
+                name=name,
+                description=description,
+            )
+        else:
+            raise ValueError(f"Unsupported backend: {backend}")
+
+    @classmethod
+    def get(
+        cls,
+        name: str,
+        backend: rt.SUPPORTED_BACKENDS = "local/csv",
+        root_dir: t.Optional[str] = None,
+        ragas_api_client: t.Optional[RagasApiClient] = None,
+    ) -> "Project":
+        """Get an existing project by name.
+
+        Args:
+            name: The name of the project to get
+            backend: The backend to use ("local/csv" or "ragas/app")
+            root_dir: The root directory for local backends
+            ragas_api_client: Optional custom Ragas API client
+
+        Returns:
+            Project: The project instance
+        """
+        if backend == "ragas/app":
             if ragas_api_client is None:
-                self._ragas_api_client = RagasApiClientFactory.create()
-            else:
-                self._ragas_api_client = ragas_api_client
+                ragas_api_client = RagasApiClientFactory.create()
+
+            # Get the project by name
+            sync_version = async_to_sync(ragas_api_client.get_project_by_name)
+            project_info = sync_version(project_name=name)
+
+            project_backend = PlatformProjectBackend(ragas_api_client)
+            return cls(
+                project_id=project_info["id"],
+                project_backend=project_backend,
+                name=project_info["title"],
+                description=project_info["description"],
+            )
+        elif backend == "local/csv":
+            if root_dir is None:
+                raise ValueError("root_dir is required for local/csv backend")
+
+            # For local backend, check if project directory exists
+            project_path = os.path.join(root_dir, name)
+            if not os.path.exists(project_path):
+                raise ValueError(
+                    f"Local project '{name}' does not exist at {project_path}"
+                )
+
+            project_backend = LocalCSVProjectBackend(root_dir)
+            return cls(
+                project_id=name,
+                project_backend=project_backend,
+                name=name,
+                description="",
+            )
         else:
-            raise ValueError(f"Invalid backend: {backend}")
-
-        # Initialize project properties
-        if backend == "ragas_app":
-            try:
-                sync_version = async_to_sync(self._ragas_api_client.get_project)
-                existing_project = sync_version(project_id=self.project_id)
-                self.project_id = existing_project["id"]
-                self.name = existing_project["title"]
-                self.description = existing_project["description"]
-            except Exception as e:
-                raise e
-        elif backend == "local":
-            self.name = self.project_id
-            self.description = ""
-
-    def _create_local_project_structure(self):
-        """Create the local directory structure for the project"""
-        os.makedirs(self._root_dir, exist_ok=True)
-        # Create datasets directory
-        os.makedirs(os.path.join(self._root_dir, "datasets"), exist_ok=True)
-        # Create experiments directory
-        os.makedirs(os.path.join(self._root_dir, "experiments"), exist_ok=True)
-
-# %% ../../nbs/api/project/core.ipynb 6
-@patch(cls_method=True)
-def create(
-    cls: Project,
-    name: str,
-    description: str = "",
-    backend: rt.SUPPORTED_BACKENDS = "local",
-    root_dir: t.Optional[str] = None,
-    ragas_api_client: t.Optional[RagasApiClient] = None,
-):
-    if backend == "ragas_app":
-        ragas_api_client = ragas_api_client or RagasApiClientFactory.create()
-        sync_version = async_to_sync(ragas_api_client.create_project)
-        new_project = sync_version(title=name, description=description)
-        return cls(
-            new_project["id"], backend="ragas_api", ragas_api_client=ragas_api_client
+            raise ValueError(f"Unsupported backend: {backend}")
+
+    def delete(self):
+        """Delete the project and all its data."""
+        if isinstance(self._backend, PlatformProjectBackend):
+            sync_version = async_to_sync(self._backend.ragas_api_client.delete_project)
+            sync_version(project_id=self.project_id)
+            print("Project deleted from Ragas platform!")
+        elif isinstance(self._backend, LocalCSVProjectBackend):
+            # Caution: this deletes the entire project directory
+            project_dir = os.path.join(self._backend.root_dir, self.project_id)
+            if os.path.exists(project_dir):
+                shutil.rmtree(project_dir)
+                print(f"Local project at {project_dir} deleted!")
+            else:
+                print(f"Local project at {project_dir} does not exist")
+
+    # Dataset operations
+    def create_dataset(
+        self,
+        model: t.Type[BaseModel],
+        name: t.Optional[str] = None,
+    ) -> Dataset:
+        """Create a new dataset.
+
+        Args:
+            model: Model class defining the dataset structure
+            name: Name of the dataset (defaults to model name if not provided)
+
+        Returns:
+            Dataset: A new dataset object for managing entries
+        """
+        if name is None:
+            name = model.__name__
+
+        dataset_id = self._backend.create_dataset(name, model)
+
+        backend_name = (
+            "ragas/app"
+            if isinstance(self._backend, PlatformProjectBackend)
+            else "local/csv"
         )
-    elif backend == "local":
-        if root_dir is None:
-            raise ValueError("root_dir is required for local backend")
-        # For local backend, we use the name as the project_id
-        project_id = name
-        return cls(project_id, backend="local", root_dir=root_dir)
-
-# %% ../../nbs/api/project/core.ipynb 9
-@patch
-def delete(self: Project):
-    if self.backend == "ragas_app":
-        sync_version = async_to_sync(self._ragas_api_client.delete_project)
-        sync_version(project_id=self.project_id)
-        print("Project deleted from Ragas API!")
-    elif self.backend == "local":
-        import shutil
-
-        # Caution: this deletes the entire project directory
-        if os.path.exists(self._root_dir):
-            shutil.rmtree(self._root_dir)
-            print(f"Local project at {self._root_dir} deleted!")
-        else:
-            print(f"Local project at {self._root_dir} does not exist")
-
-    @patch
-    def __repr__(self: Project):
-        return f"Project(name='{self.name}', backend='{self.backend}')"
-
-# %% ../../nbs/api/project/core.ipynb 11
-@patch(cls_method=True)
-def get(
-    cls: Project,
-    name: str,
-    backend: rt.SUPPORTED_BACKENDS = "local",
-    root_dir: t.Optional[str] = None,
-    ragas_api_client: t.Optional[RagasApiClient] = None,
-) -> Project:
-    """Get an existing project by name.
-
-    Args:
-        name: The name of the project to get
-        backend: The backend to use (ragas_api or local)
-        root_dir: The root directory for local backends
-        ragas_api_client: Optional custom Ragas API client
-
-    Returns:
-        Project: The project instance
-    """
-    if backend == "ragas_app":
-        # Search for project with given name in Ragas API
-        if ragas_api_client is None:
-            ragas_api_client = RagasApiClientFactory.create()
-
-        # get the project by name
-        sync_version = async_to_sync(ragas_api_client.get_project_by_name)
-        project_info = sync_version(project_name=name)
-
-        # Return Project instance
-        return Project(
-            project_id=project_info["id"],
-            backend="ragas_app",
-            ragas_api_client=ragas_api_client,
+
+        return Dataset(
+            name=name,
+            model=model,
+            project_id=self.project_id,
+            dataset_id=dataset_id,
+            datatable_type="datasets",
+            ragas_api_client=getattr(self._backend, "ragas_api_client", None),
+            backend=backend_name,
+            local_root_dir=getattr(self._backend, "root_dir", None),
         )
-    elif backend == "local":
-        if root_dir is None:
-            raise ValueError("root_dir is required for local backend")
-
-        # For local backend, check if project directory exists
-        project_path = os.path.join(root_dir, name)
-        if not os.path.exists(project_path):
-            raise ValueError(f"Local project '{name}' does not exist at {project_path}")
-
-        # Return Project instance
-        return Project(
-            project_id=name,
-            backend="local",
-            root_dir=root_dir,
+
+    def get_dataset(
+        self,
+        dataset_name: str,
+        model: t.Type[BaseModel],
+    ) -> Dataset:
+        """Get an existing dataset by name.
+
+        Args:
+            dataset_name: The name of the dataset to retrieve
+            model: The model class to use for the dataset entries
+
+        Returns:
+            Dataset: The retrieved dataset
+        """
+        dataset_id, dataset_backend = self._backend.get_dataset_by_name(
+            dataset_name, model
+        )
+
+        backend_name = (
+            "ragas/app"
+            if isinstance(self._backend, PlatformProjectBackend)
+            else "local/csv"
+        )
+
+        return Dataset(
+            name=dataset_name,
+            model=model,
+            project_id=self.project_id,
+            dataset_id=dataset_id,
+            datatable_type="datasets",
+            ragas_api_client=getattr(self._backend, "ragas_api_client", None),
+            backend=backend_name,
+            local_root_dir=getattr(self._backend, "root_dir", None),
+        )
+
+    def list_datasets(self) -> t.List[str]:
+        """List all datasets in the project.
+
+        Returns:
+            List[str]: Names of all datasets in the project
+        """
+        datasets = self._backend.list_datasets()
+        return [dataset["name"] for dataset in datasets]
+
+    # Experiment operations
+    def create_experiment(
+        self,
+        name: str,
+        model: t.Type[BaseModel],
+    ) -> Experiment:
+        """Create a new experiment.
+
+        Args:
+            name: Name of the experiment
+            model: Model class defining the experiment structure
+
+        Returns:
+            Experiment: An experiment object for managing results
+        """
+        experiment_id = self._backend.create_experiment(name, model)
+
+        backend_name = (
+            "ragas/app"
+            if isinstance(self._backend, PlatformProjectBackend)
+            else "local/csv"
+        )
+
+        return Experiment(
+            name=name,
+            model=model,
+            project_id=self.project_id,
+            experiment_id=experiment_id,
+            ragas_api_client=getattr(self._backend, "ragas_api_client", None),
+            backend=backend_name,
+            local_root_dir=getattr(self._backend, "root_dir", None),
+        )
+
+    def get_experiment(
+        self,
+        experiment_name: str,
+        model: t.Type[BaseModel],
+    ) -> Experiment:
+        """Get an existing experiment by name.
+
+        Args:
+            experiment_name: The name of the experiment to retrieve
+            model: The model class to use for the experiment results
+
+        Returns:
+            Experiment: The retrieved experiment
+        """
+        experiment_id, experiment_backend = self._backend.get_experiment_by_name(
+            experiment_name, model
+        )
+
+        backend_name = (
+            "ragas/app"
+            if isinstance(self._backend, PlatformProjectBackend)
+            else "local/csv"
+        )
+
+        return Experiment(
+            name=experiment_name,
+            model=model,
+            project_id=self.project_id,
+            experiment_id=experiment_id,
+            ragas_api_client=getattr(self._backend, "ragas_api_client", None),
+            backend=backend_name,
+            local_root_dir=getattr(self._backend, "root_dir", None),
+        )
+
+    def list_experiments(self) -> t.List[str]:
+        """List all experiments in the project.
+
+        Returns:
+            List[str]: Names of all experiments in the project
+        """
+        experiments = self._backend.list_experiments()
+        return [experiment["name"] for experiment in experiments]
+
+    # Utility methods for local backend compatibility
+    def get_dataset_path(self, dataset_name: str) -> str:
+        """Get the path to a dataset file in the local backend.
+
+        Args:
+            dataset_name: Name of the dataset
+
+        Returns:
+            str: Path to the dataset CSV file
+
+        Raises:
+            ValueError: If not using local backend
+        """
+        if not isinstance(self._backend, LocalCSVProjectBackend):
+            raise ValueError("This method is only available for local/csv backend")
+        return os.path.join(
+            self._backend._project_dir, "datasets", f"{dataset_name}.csv"
+        )
+
+    def get_experiment_path(self, experiment_name: str) -> str:
+        """Get the path to an experiment file in the local backend.
+
+        Args:
+            experiment_name: Name of the experiment
+
+        Returns:
+            str: Path to the experiment CSV file
+
+        Raises:
+            ValueError: If not using local backend
+        """
+        if not isinstance(self._backend, LocalCSVProjectBackend):
+            raise ValueError("This method is only available for local/csv backend")
+        return os.path.join(
+            self._backend._project_dir, "experiments", f"{experiment_name}.csv"
+        )
+
+    def __repr__(self) -> str:
+        """String representation of the project."""
+        backend_name = (
+            "ragas/app"
+            if isinstance(self._backend, PlatformProjectBackend)
+            else "local/csv"
         )
-    else:
-        raise ValueError(f"Invalid backend: {backend}")
-
-# %% ../../nbs/api/project/core.ipynb 13
-@patch
-def get_dataset_path(self: Project, dataset_name: str) -> str:
-    """Get the path to a dataset file in the local backend"""
-    if self.backend != "local":
-        raise ValueError("This method is only available for local backend")
-    return os.path.join(self._root_dir, "datasets", f"{dataset_name}.csv")
-
-
-@patch
-def get_experiment_path(self: Project, experiment_name: str) -> str:
-    """Get the path to an experiment file in the local backend"""
-    if self.backend != "local":
-        raise ValueError("This method is only available for local backend")
-    return os.path.join(self._root_dir, "experiments", f"{experiment_name}.csv")
+        return f"Project(name='{self.name}', backend='{backend_name}')"
diff --git a/experimental/ragas_experimental/project/datasets.py b/experimental/ragas_experimental/project/datasets.py
deleted file mode 100644
index 5f77c0cd2..000000000
--- a/experimental/ragas_experimental/project/datasets.py
+++ /dev/null
@@ -1,280 +0,0 @@
-"""Methods to create and manage datasets within projects"""
-
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/project/datasets.ipynb.
-
-# %% auto 0
-__all__ = ['create_dataset_columns', 'get_dataset_from_ragas_app', 'get_dataset_from_local']
-
-# %% ../../nbs/api/project/datasets.ipynb 3
-import typing as t
-import os
-import asyncio
-import tempfile
-
-from fastcore.utils import patch
-from pydantic import BaseModel
-
-from .core import Project
-from ..typing import SUPPORTED_BACKENDS
-from ..backends.factory import RagasApiClientFactory
-from ..backends.ragas_api_client import RagasApiClient
-import ragas_experimental.typing as rt
-from ..utils import async_to_sync, create_nano_id
-from ..dataset import Dataset
-from ..utils import get_test_directory
-
-# %% ../../nbs/api/project/datasets.ipynb 4
-async def create_dataset_columns(
-    project_id, dataset_id, columns, create_dataset_column_func
-):
-    tasks = []
-    for column in columns:
-        tasks.append(
-            create_dataset_column_func(
-                project_id=project_id,
-                dataset_id=dataset_id,
-                id=create_nano_id(),
-                name=column["name"],
-                type=column["type"],
-                settings=column["settings"],
-            )
-        )
-    return await asyncio.gather(*tasks)
-
-# %% ../../nbs/api/project/datasets.ipynb 5
-def get_dataset_from_ragas_app(
-    self: Project, name: str, model: t.Type[BaseModel]
-) -> Dataset:
-    """Create a dataset in the Ragas App backend."""
-    # create the dataset
-    sync_version = async_to_sync(self._ragas_api_client.create_dataset)
-    dataset_info = sync_version(
-        project_id=self.project_id,
-        name=name if name is not None else model.__name__,
-    )
-
-    # create the columns for the dataset
-    column_types = rt.ModelConverter.model_to_columns(model)
-    sync_version = async_to_sync(create_dataset_columns)
-    sync_version(
-        project_id=self.project_id,
-        dataset_id=dataset_info["id"],
-        columns=column_types,
-        create_dataset_column_func=self._ragas_api_client.create_dataset_column,
-    )
-
-    # Return a new Dataset instance
-    return Dataset(
-        name=name if name is not None else model.__name__,
-        model=model,
-        datatable_type="datasets",
-        project_id=self.project_id,
-        dataset_id=dataset_info["id"],
-        ragas_api_client=self._ragas_api_client,
-        backend="ragas_app",
-    )
-
-# %% ../../nbs/api/project/datasets.ipynb 6
-def get_dataset_from_local(
-    self: Project, name: str, model: t.Type[BaseModel]
-) -> Dataset:
-    """Create a dataset in the local filesystem backend.
-
-    Args:
-        name: Name of the dataset
-        model: Pydantic model defining the structure
-
-    Returns:
-        Dataset: A new dataset configured to use the local backend
-    """
-    # Use a UUID as the dataset ID
-    dataset_id = create_nano_id()
-
-    # Return a new Dataset instance with local backend
-    return Dataset(
-        name=name if name is not None else model.__name__,
-        model=model,
-        datatable_type="datasets",
-        project_id=self.project_id,
-        dataset_id=dataset_id,
-        backend="local",
-        local_root_dir=os.path.dirname(self._root_dir),  # Root dir for all projects
-    )
-
-# %% ../../nbs/api/project/datasets.ipynb 7
-@patch
-def create_dataset(
-    self: Project,
-    model: t.Type[BaseModel],
-    name: t.Optional[str] = None,
-    backend: t.Optional[SUPPORTED_BACKENDS] = None,
-) -> Dataset:
-    """Create a new dataset.
-
-    Args:
-        model: Model class defining the dataset structure
-        name: Name of the dataset (defaults to model name if not provided)
-        backend: The backend to use (defaults to project's backend if not specified)
-
-    Returns:
-        Dataset: A new dataset object for managing entries
-    """
-    # If name is not provided, use the model name
-    if name is None:
-        name = model.__name__
-
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    # Create dataset using the appropriate backend
-    if backend == "local":
-        return get_dataset_from_local(self, name, model)
-    elif backend == "ragas_app":
-        return get_dataset_from_ragas_app(self, name, model)
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/datasets.ipynb 16
-@patch
-def get_dataset_by_id(
-    self: Project,
-    dataset_id: str,
-    model: t.Type[BaseModel],
-    backend: t.Optional[SUPPORTED_BACKENDS] = None,
-) -> Dataset:
-    """Get an existing dataset by ID.
-
-    Args:
-        dataset_id: The ID of the dataset to retrieve
-        model: The model class to use for the dataset entries
-        backend: The backend to use (defaults to project's backend)
-
-    Returns:
-        Dataset: The retrieved dataset
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Search for database with given ID
-        sync_version = async_to_sync(self._ragas_api_client.get_dataset)
-        dataset_info = sync_version(project_id=self.project_id, dataset_id=dataset_id)
-
-        # For now, return Dataset without model type
-        return Dataset(
-            name=dataset_info["name"],
-            model=model,
-            datatable_type="datasets",
-            project_id=self.project_id,
-            dataset_id=dataset_id,
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app",
-        )
-    elif backend == "local":
-        # For local backend, this is not a typical operation since we use names
-        # We could maintain a mapping of IDs to names, but for now just raise an error
-        raise NotImplementedError(
-            "get_dataset_by_id is not implemented for local backend. "
-            "Use get_dataset with the dataset name instead."
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/datasets.ipynb 17
-@patch
-def get_dataset(
-    self: Project,
-    dataset_name: str,
-    model: t.Type[BaseModel],
-    backend: t.Optional[SUPPORTED_BACKENDS] = None,
-) -> Dataset:
-    """Get an existing dataset by name.
-
-    Args:
-        dataset_name: The name of the dataset to retrieve
-        model: The model class to use for the dataset entries
-        backend: The backend to use (defaults to project's backend if not specified)
-
-    Returns:
-        Dataset: The retrieved dataset
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Search for dataset with given name
-        sync_version = async_to_sync(self._ragas_api_client.get_dataset_by_name)
-        dataset_info = sync_version(
-            project_id=self.project_id, dataset_name=dataset_name
-        )
-
-        # Return Dataset instance
-        return Dataset(
-            name=dataset_info["name"],
-            model=model,
-            datatable_type="datasets",
-            project_id=self.project_id,
-            dataset_id=dataset_info["id"],
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app",
-        )
-    elif backend == "local":
-        # Check if the dataset file exists
-        dataset_path = self.get_dataset_path(dataset_name)
-        if not os.path.exists(dataset_path):
-            raise ValueError(f"Dataset '{dataset_name}' does not exist")
-
-        # Create dataset instance with a random ID
-        dataset_id = create_nano_id()
-
-        # Return Dataset instance
-        return Dataset(
-            name=dataset_name,
-            model=model,
-            datatable_type="datasets",
-            project_id=self.project_id,
-            dataset_id=dataset_id,
-            backend="local",
-            local_root_dir=os.path.dirname(self._root_dir),  # Root dir for all projects
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/datasets.ipynb 18
-@patch
-def list_dataset_names(
-    self: Project, backend: t.Optional[SUPPORTED_BACKENDS] = None
-) -> t.List[str]:
-    """List all datasets in the project.
-
-    Args:
-        backend: The backend to use (defaults to project's backend)
-
-    Returns:
-        List[str]: Names of all datasets in the project
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Get all datasets from API
-        sync_version = async_to_sync(self._ragas_api_client.list_datasets)
-        datasets = sync_version(project_id=self.project_id)
-        return [dataset["name"] for dataset in datasets]
-    elif backend == "local":
-        # Get all CSV files in the datasets directory
-        datasets_dir = os.path.join(self._root_dir, "datasets")
-        if not os.path.exists(datasets_dir):
-            return []
-
-        return [
-            os.path.splitext(f)[0]
-            for f in os.listdir(datasets_dir)
-            if f.endswith(".csv")
-        ]
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
diff --git a/experimental/ragas_experimental/project/decorators.py b/experimental/ragas_experimental/project/decorators.py
new file mode 100644
index 000000000..c2bafad6e
--- /dev/null
+++ b/experimental/ragas_experimental/project/decorators.py
@@ -0,0 +1,333 @@
+"""Experiment decorators for running and tracking experiments."""
+
+import asyncio
+import os
+import typing as t
+from functools import wraps
+from pathlib import Path
+
+import git
+from tqdm import tqdm
+
+from ..dataset import Dataset
+from ..utils import async_to_sync
+from .utils import memorable_names
+
+
+@t.runtime_checkable
+class ExperimentProtocol(t.Protocol):
+    async def __call__(self, *args, **kwargs): ...
+    async def run_async(
+        self, dataset: Dataset, name: t.Optional[str] = None, **kwargs
+    ): ...
+
+
+def find_git_root(start_path: t.Union[str, Path, None] = None) -> Path:
+    """Find the root directory of a git repository by traversing up from the start path."""
+    # Start from the current directory if no path is provided
+    if start_path is None:
+        start_path = Path.cwd()
+    else:
+        start_path = Path(start_path).resolve()
+
+    # Check if the current directory is a git repository
+    current_path = start_path
+    while current_path != current_path.parent:  # Stop at filesystem root
+        if (current_path / ".git").exists() and (current_path / ".git").is_dir():
+            return current_path
+
+        # Move up to the parent directory
+        current_path = current_path.parent
+
+    # Final check for the root directory
+    if (current_path / ".git").exists() and (current_path / ".git").is_dir():
+        return current_path
+
+    # No git repository found
+    raise ValueError(f"No git repository found in or above {start_path}")
+
+
+def version_experiment(
+    experiment_name: str,
+    commit_message: t.Optional[str] = None,
+    repo_path: t.Union[str, Path, None] = None,
+    create_branch: bool = True,
+    stage_all: bool = False,
+) -> str:
+    """Version control the current state of the codebase for an experiment."""
+    # Default to current directory if no repo path is provided
+    if repo_path is None:
+        repo_path = find_git_root()
+
+    # Initialize git repo object
+    repo = git.Repo(repo_path)
+
+    # Check if there are any changes to the repo
+    has_changes = False
+    if stage_all and repo.is_dirty(untracked_files=True):
+        print("Staging all changes")
+        repo.git.add(".")
+        has_changes = True
+    elif repo.is_dirty(untracked_files=False):
+        print("Staging changes to tracked files")
+        repo.git.add("-u")
+        has_changes = True
+
+    # Check if there are uncommitted changes
+    if has_changes:
+        # Default commit message if none provided
+        if commit_message is None:
+            commit_message = f"Experiment: {experiment_name}"
+
+        # Commit changes
+        commit = repo.index.commit(commit_message)
+        commit_hash = commit.hexsha
+        print(f"Changes committed with hash: {commit_hash[:8]}")
+    else:
+        # No changes to commit, use current HEAD
+        commit_hash = repo.head.commit.hexsha
+        print("No changes detected, nothing to commit")
+
+    # Format the branch/tag name
+    version_name = f"ragas/{experiment_name}"
+
+    # Create branch if requested
+    if create_branch:
+        repo.create_head(version_name, commit_hash)
+        print(f"Created branch: {version_name}")
+
+    return commit_hash
+
+
+class ExperimentDecorator:
+    """Base class for experiment decorators that adds methods to Project instances."""
+
+    def __init__(self, project):
+        self.project = project
+
+    def experiment(
+        self,
+        experiment_model,
+        name_prefix: str = "",
+        save_to_git: bool = False,
+        stage_all: bool = False,
+    ):
+        """Decorator for creating experiment functions.
+
+        Args:
+            experiment_model: The model type to use for experiment results
+            name_prefix: Optional prefix for experiment names
+            save_to_git: Whether to save experiment state to git
+            stage_all: Whether to stage all files when saving to git
+
+        Returns:
+            Decorator function that wraps experiment functions
+        """
+
+        def decorator(func: t.Callable) -> ExperimentProtocol:
+            @wraps(func)
+            async def wrapped_experiment(*args, **kwargs):
+                # Simply call the function
+                return await func(*args, **kwargs)
+
+            # Add run method to the wrapped function
+            async def run_async(
+                dataset: Dataset,
+                name: t.Optional[str] = None,
+                save_to_git: bool = save_to_git,
+                stage_all: bool = stage_all,
+            ):
+                # If name is not provided, generate a memorable name
+                if name is None:
+                    name = memorable_names.generate_unique_name()
+                if name_prefix:
+                    name = f"{name_prefix}-{name}"
+
+                experiment_view = None
+                try:
+                    # Create the experiment view
+                    experiment_view = self.project.create_experiment(
+                        name=name, model=experiment_model
+                    )
+
+                    # Create tasks for all items
+                    tasks = []
+                    for item in dataset:
+                        tasks.append(wrapped_experiment(item))
+
+                    # Calculate total operations (processing + appending)
+                    total_operations = (
+                        len(tasks) * 2
+                    )  # Each item requires processing and appending
+
+                    # Use tqdm for combined progress tracking
+                    results = []
+                    progress_bar = tqdm(
+                        total=total_operations, desc="Running experiment"
+                    )
+
+                    # Process all items
+                    for future in asyncio.as_completed(tasks):
+                        result = await future
+                        if result is not None:
+                            results.append(result)
+                        progress_bar.update(1)  # Update for task completion
+
+                    # Append results to experiment view
+                    for result in results:
+                        experiment_view.append(result)
+                        progress_bar.update(1)  # Update for append operation
+
+                    progress_bar.close()
+
+                except Exception as e:
+                    # Clean up the experiment if there was an error and it was created
+                    if experiment_view is not None:
+                        try:
+                            # For platform backend, delete via API
+                            if hasattr(self.project._backend, "ragas_api_client"):
+                                sync_version = async_to_sync(
+                                    self.project._backend.ragas_api_client.delete_experiment
+                                )
+                                sync_version(
+                                    project_id=self.project.project_id,
+                                    experiment_id=experiment_view.experiment_id,
+                                )
+                            else:
+                                # For local backend, delete the file
+                                experiment_path = self.project.get_experiment_path(
+                                    experiment_view.name
+                                )
+                                if os.path.exists(experiment_path):
+                                    os.remove(experiment_path)
+                        except Exception as cleanup_error:
+                            print(
+                                f"Failed to clean up experiment after error: {cleanup_error}"
+                            )
+
+                    # Re-raise the original exception
+                    raise e
+
+                # Save to git if requested
+                if save_to_git:
+                    repo_path = find_git_root()
+                    version_experiment(
+                        experiment_name=name, repo_path=repo_path, stage_all=stage_all
+                    )
+
+                return experiment_view
+
+            wrapped_experiment.__setattr__("run_async", run_async)
+            return t.cast(ExperimentProtocol, wrapped_experiment)
+
+        return decorator
+
+    def langfuse_experiment(
+        self,
+        experiment_model,
+        name_prefix: str = "",
+        save_to_git: bool = True,
+        stage_all: bool = True,
+    ):
+        """Decorator for creating experiment functions with Langfuse integration.
+
+        Args:
+            experiment_model: The model type to use for experiment results
+            name_prefix: Optional prefix for experiment names
+            save_to_git: Whether to save experiment state to git
+            stage_all: Whether to stage all files when saving to git
+
+        Returns:
+            Decorator function that wraps experiment functions with Langfuse observation
+        """
+        try:
+            from langfuse.decorators import observe
+        except ImportError:
+            raise ImportError(
+                "langfuse package is required for langfuse_experiment decorator"
+            )
+
+        def decorator(func: t.Callable) -> ExperimentProtocol:
+            @wraps(func)
+            async def langfuse_wrapped_func(*args, **kwargs):
+                # Apply langfuse observation directly here
+                trace_name = (
+                    f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
+                )
+                observed_func = observe(name=trace_name)(func)
+                return await observed_func(*args, **kwargs)
+
+            # Now create the experiment wrapper with our already-observed function
+            experiment_wrapper = self.experiment(
+                experiment_model, name_prefix, save_to_git, stage_all
+            )(langfuse_wrapped_func)
+
+            return t.cast(ExperimentProtocol, experiment_wrapper)
+
+        return decorator
+
+    def mlflow_experiment(
+        self,
+        experiment_model,
+        name_prefix: str = "",
+        save_to_git: bool = True,
+        stage_all: bool = True,
+    ):
+        """Decorator for creating experiment functions with MLflow integration.
+
+        Args:
+            experiment_model: The model type to use for experiment results
+            name_prefix: Optional prefix for experiment names
+            save_to_git: Whether to save experiment state to git
+            stage_all: Whether to stage all files when saving to git
+
+        Returns:
+            Decorator function that wraps experiment functions with MLflow observation
+        """
+        try:
+            from mlflow import trace
+        except ImportError:
+            raise ImportError(
+                "mlflow package is required for mlflow_experiment decorator"
+            )
+
+        def decorator(func: t.Callable) -> ExperimentProtocol:
+            @wraps(func)
+            async def mlflow_wrapped_func(*args, **kwargs):
+                # Apply mlflow observation directly here
+                trace_name = (
+                    f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
+                )
+                observed_func = trace(name=trace_name)(func)
+                return await observed_func(*args, **kwargs)
+
+            # Now create the experiment wrapper with our already-observed function
+            experiment_wrapper = self.experiment(
+                experiment_model, name_prefix, save_to_git, stage_all
+            )(mlflow_wrapped_func)
+
+            return t.cast(ExperimentProtocol, experiment_wrapper)
+
+        return decorator
+
+
+def add_experiment_decorators(project):
+    """Add experiment decorator methods to a Project instance.
+
+    This function dynamically adds the experiment decorator methods to a Project instance,
+    maintaining the same interface as the @patch decorators but without using fastcore.
+
+    Args:
+        project: Project instance to add decorators to
+
+    Returns:
+        The project instance with added decorator methods
+    """
+    decorator_instance = ExperimentDecorator(project)
+
+    # Add decorator methods to the project instance
+    project.experiment = decorator_instance.experiment
+    project.langfuse_experiment = decorator_instance.langfuse_experiment
+    project.mlflow_experiment = decorator_instance.mlflow_experiment
+
+    return project
diff --git a/experimental/ragas_experimental/project/experiments.py b/experimental/ragas_experimental/project/experiments.py
deleted file mode 100644
index 54db2db6d..000000000
--- a/experimental/ragas_experimental/project/experiments.py
+++ /dev/null
@@ -1,810 +0,0 @@
-"""How to run experiments"""
-
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/project/experiments.ipynb.
-
-# %% auto 0
-__all__ = ['memorable_names', 'create_experiment_columns', 'get_experiment_from_local', 'get_experiment_from_ragas_app',
-           'find_git_root', 'version_experiment', 'cleanup_experiment_branches', 'ExperimentProtocol']
-
-# %% ../../nbs/api/project/experiments.ipynb 2
-from functools import wraps
-import asyncio
-import typing as t
-import os
-
-from fastcore.utils import patch
-from tqdm import tqdm
-
-from .core import Project
-from ragas_experimental.model.pydantic_model import (
-    ExtendedPydanticBaseModel as BaseModel,
-)
-from ..utils import async_to_sync, create_nano_id
-from ..dataset import Dataset, BaseModelType
-from ..experiment import Experiment
-import ragas_experimental.typing as rt
-
-# %% ../../nbs/api/project/experiments.ipynb 4
-# Add this helper function similar to create_dataset_columns in core.ipynb
-async def create_experiment_columns(
-    project_id, experiment_id, columns, create_experiment_column_func
-):
-    tasks = []
-    for column in columns:
-        tasks.append(
-            create_experiment_column_func(
-                project_id=project_id,
-                experiment_id=experiment_id,
-                id=create_nano_id(),
-                name=column["name"],
-                type=column["type"],
-                settings=column["settings"],
-            )
-        )
-    return await asyncio.gather(*tasks)
-
-# %% ../../nbs/api/project/experiments.ipynb 5
-def get_experiment_from_local(
-    self: Project, name: str, model: t.Type[BaseModel]
-) -> Experiment:
-    """Create an experiment in the local filesystem backend.
-
-    Args:
-        name: Name of the experiment
-        model: Model class defining the experiment structure
-
-    Returns:
-        Experiment: A new experiment configured to use the local backend
-    """
-    # Use a UUID as the experiment ID
-    experiment_id = create_nano_id()
-
-    # Return a new Experiment instance with local backend
-    return Experiment(
-        name=name,
-        model=model,
-        project_id=self.project_id,
-        experiment_id=experiment_id,
-        backend="local",
-        local_root_dir=os.path.dirname(self._root_dir),  # Root dir for all projects
-    )
-
-# %% ../../nbs/api/project/experiments.ipynb 6
-def get_experiment_from_ragas_app(
-    self: Project, name: str, model: t.Type[BaseModel]
-) -> Experiment:
-    """Create an experiment in the Ragas App backend.
-
-    Args:
-        name: Name of the experiment
-        model: Model class defining the experiment structure
-
-    Returns:
-        Experiment: A new experiment configured to use the ragas_app backend
-    """
-    # Create the experiment in the API
-    sync_version = async_to_sync(self._ragas_api_client.create_experiment)
-    experiment_info = sync_version(
-        project_id=self.project_id,
-        name=name,
-    )
-
-    # Create the columns for the experiment
-    column_types = rt.ModelConverter.model_to_columns(model)
-    sync_version = async_to_sync(create_experiment_columns)
-    sync_version(
-        project_id=self.project_id,
-        experiment_id=experiment_info["id"],
-        columns=column_types,
-        create_experiment_column_func=self._ragas_api_client.create_experiment_column,
-    )
-
-    # Return a new Experiment instance with ragas_app backend
-    return Experiment(
-        name=name,
-        model=model,
-        project_id=self.project_id,
-        experiment_id=experiment_info["id"],
-        ragas_api_client=self._ragas_api_client,
-        backend="ragas_app",
-    )
-
-# %% ../../nbs/api/project/experiments.ipynb 8
-@patch
-def create_experiment(
-    self: Project,
-    name: str,
-    model: t.Type[BaseModel],
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-) -> Experiment:
-    """Create a new experiment.
-
-    Args:
-        name: Name of the experiment
-        model: Model class defining the experiment structure
-        backend: The backend to use (defaults to project's backend if not specified)
-
-    Returns:
-        Experiment: An experiment object for managing results
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    # Create experiment using the appropriate backend
-    if backend == "local":
-        return get_experiment_from_local(self, name, model)
-    elif backend == "ragas_app":
-        return get_experiment_from_ragas_app(self, name, model)
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/experiments.ipynb 9
-@patch
-def get_experiment_path(self: Project, experiment_name: str) -> str:
-    """Get the filesystem path for an experiment.
-
-    Args:
-        experiment_name: The name of the experiment
-
-    Returns:
-        str: The absolute path to the experiment CSV file
-    """
-    # Create path relative to project root
-    return os.path.join(self._root_dir, "experiments", f"{experiment_name}.csv")
-
-# %% ../../nbs/api/project/experiments.ipynb 12
-@patch
-def get_experiment_by_id(
-    self: Project,
-    experiment_id: str,
-    model: t.Type[BaseModel],
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-) -> Experiment:
-    """Get an existing experiment by ID.
-
-    Args:
-        experiment_id: The ID of the experiment to retrieve
-        model: The model class to use for the experiment results
-        backend: The backend to use (defaults to project's backend)
-
-    Returns:
-        Experiment: The retrieved experiment
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Get experiment info from API
-        sync_version = async_to_sync(self._ragas_api_client.get_experiment)
-        experiment_info = sync_version(
-            project_id=self.project_id, experiment_id=experiment_id
-        )
-
-        # Return Experiment instance with ragas_app backend
-        return Experiment(
-            name=experiment_info["name"],
-            model=model,
-            project_id=self.project_id,
-            experiment_id=experiment_id,
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app",
-        )
-    elif backend == "local":
-        # For local backend, this is not a typical operation since we use names
-        # We could maintain a mapping of IDs to names, but for now just raise an error
-        raise NotImplementedError(
-            "get_experiment_by_id is not implemented for local backend. "
-            "Use get_experiment with the experiment name instead."
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/experiments.ipynb 13
-@patch
-def list_experiment_names(
-    self: Project, backend: t.Optional[rt.SUPPORTED_BACKENDS] = None
-) -> t.List[str]:
-    """List all experiments in the project.
-
-    Args:
-        backend: The backend to use (defaults to project's backend)
-
-    Returns:
-        List[str]: Names of all experiments in the project
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Get all experiments from API
-        sync_version = async_to_sync(self._ragas_api_client.list_experiments)
-        experiments = sync_version(project_id=self.project_id)
-        return [experiment["name"] for experiment in experiments]
-    elif backend == "local":
-        # Get all CSV files in the experiments directory
-        experiments_dir = os.path.join(self._root_dir, "experiments")
-        if not os.path.exists(experiments_dir):
-            return []
-
-        return [
-            os.path.splitext(f)[0]
-            for f in os.listdir(experiments_dir)
-            if f.endswith(".csv")
-        ]
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/experiments.ipynb 16
-@patch
-def get_experiment(
-    self: Project,
-    experiment_name: str,
-    model: t.Type[BaseModel],
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-) -> Experiment:
-    """Get an existing experiment by name.
-
-    Args:
-        experiment_name: The name of the experiment to retrieve
-        model: The model class to use for the experiment results
-        backend: The backend to use (defaults to project's backend if not specified)
-
-    Returns:
-        Experiment: The retrieved experiment
-    """
-    # If backend is not specified, use the project's backend
-    if backend is None:
-        backend = self.backend
-
-    if backend == "ragas_app":
-        # Search for experiment with given name
-        sync_version = async_to_sync(self._ragas_api_client.get_experiment_by_name)
-        experiment_info = sync_version(
-            project_id=self.project_id, experiment_name=experiment_name
-        )
-
-        # Return Experiment instance with ragas_app backend
-        return Experiment(
-            name=experiment_info["name"],
-            model=model,
-            project_id=self.project_id,
-            experiment_id=experiment_info["id"],
-            ragas_api_client=self._ragas_api_client,
-            backend="ragas_app",
-        )
-    elif backend == "local":
-        # Check if the experiment file exists
-        experiment_path = self.get_experiment_path(experiment_name)
-        if not os.path.exists(experiment_path):
-            raise ValueError(f"Experiment '{experiment_name}' does not exist")
-
-        # Create experiment instance with a random ID
-        experiment_id = create_nano_id()
-
-        # Return Experiment instance with local backend
-        return Experiment(
-            name=experiment_name,
-            model=model,
-            project_id=self.project_id,
-            experiment_id=experiment_id,
-            backend="local",
-            local_root_dir=os.path.dirname(self._root_dir),  # Root dir for all projects
-        )
-    else:
-        raise ValueError(f"Unsupported backend: {backend}")
-
-# %% ../../nbs/api/project/experiments.ipynb 19
-import git
-from pathlib import Path
-
-# %% ../../nbs/api/project/experiments.ipynb 20
-def find_git_root(
-    start_path: t.Union[str, Path, None] = None  # starting path to search from
-) -> Path:
-    """Find the root directory of a git repository by traversing up from the start path."""
-    # Start from the current directory if no path is provided
-    if start_path is None:
-        start_path = Path.cwd()
-    else:
-        start_path = Path(start_path).resolve()
-
-    # Check if the current directory is a git repository
-    current_path = start_path
-    while current_path != current_path.parent:  # Stop at filesystem root
-        if (current_path / ".git").exists() and (current_path / ".git").is_dir():
-            return current_path
-
-        # Move up to the parent directory
-        current_path = current_path.parent
-
-    # Final check for the root directory
-    if (current_path / ".git").exists() and (current_path / ".git").is_dir():
-        return current_path
-
-    # No git repository found
-    raise ValueError(f"No git repository found in or above {start_path}")
-
-# %% ../../nbs/api/project/experiments.ipynb 23
-def version_experiment(
-    experiment_name: str,
-    commit_message: t.Optional[str] = None,
-    repo_path: t.Union[str, Path, None] = None,
-    create_branch: bool = True,
-    stage_all: bool = False,
-) -> str:
-    "Version control the current state of the codebase for an experiment."
-    # Default to current directory if no repo path is provided
-    if repo_path is None:
-        repo_path = find_git_root()
-
-    # Initialize git repo object
-    repo = git.Repo(repo_path)
-
-    # check if there are any changes to the repo
-    has_changes = False
-    if stage_all and repo.is_dirty(untracked_files=True):
-        print("Staging all changes")
-        repo.git.add(".")
-        has_changes = True
-    elif repo.is_dirty(untracked_files=False):
-        print("Staging changes to tracked files")
-        repo.git.add("-u")
-        has_changes = True
-
-    # Check if there are uncommitted changes
-    if has_changes:
-        # Default commit message if none provided
-        if commit_message is None:
-            commit_message = f"Experiment: {experiment_name}"
-
-        # Commit changes
-        commit = repo.index.commit(commit_message)
-        commit_hash = commit.hexsha
-        print(f"Changes committed with hash: {commit_hash[:8]}")
-    else:
-        # No changes to commit, use current HEAD
-        commit_hash = repo.head.commit.hexsha
-        print("No changes detected, nothing to commit")
-
-    # Format the branch/tag name
-    version_name = f"ragas/{experiment_name}"
-
-    # Create branch if requested
-    if create_branch:
-        branch = repo.create_head(version_name, commit_hash)
-        print(f"Created branch: {version_name}")
-
-    return commit_hash
-
-# %% ../../nbs/api/project/experiments.ipynb 24
-def cleanup_experiment_branches(
-    prefix: str = "ragas/",
-    repo_path: t.Union[str, Path, None] = None,
-    interactive: bool = True,
-    dry_run: bool = False,
-) -> t.List[str]:
-    """Clean up git branches with the specified prefix."""
-    # Find the git repository root if not provided
-    if repo_path is None:
-        try:
-            repo_path = find_git_root()
-        except ValueError as e:
-            raise ValueError(f"Cannot cleanup branches: {str(e)}")
-
-    # Initialize git repo object
-    repo = git.Repo(repo_path)
-    current_branch = repo.active_branch.name
-
-    # Get all branches matching the prefix
-    matching_branches = []
-    for branch in repo.branches:
-        if branch.name.startswith(prefix):
-            matching_branches.append(branch.name)
-
-    if not matching_branches:
-        print(f"No branches found with prefix '{prefix}'")
-        return []
-
-    # Remove current branch from the list if present
-    if current_branch in matching_branches:
-        print(f"Note: Current branch '{current_branch}' will be excluded from deletion")
-        matching_branches.remove(current_branch)
-
-    if not matching_branches:
-        print("No branches available for deletion after excluding current branch")
-        return []
-
-    # Show branches to the user
-    print(f"Found {len(matching_branches)} branches with prefix '{prefix}':")
-    for branch_name in matching_branches:
-        print(f"- {branch_name}")
-
-    # Handle confirmation in interactive mode
-    proceed = True
-    if interactive and not dry_run:
-        confirm = (
-            input(f"\nDelete these {len(matching_branches)} branches? (y/n): ")
-            .strip()
-            .lower()
-        )
-        proceed = confirm == "y"
-
-    if not proceed:
-        print("Operation cancelled")
-        return []
-
-    # Perform deletion
-    deleted_branches = []
-    for branch_name in matching_branches:
-        if dry_run:
-            print(f"Would delete branch: {branch_name}")
-            deleted_branches.append(branch_name)
-        else:
-            try:
-                # Delete the branch
-                repo.git.branch("-D", branch_name)
-                print(f"Deleted branch: {branch_name}")
-                deleted_branches.append(branch_name)
-            except git.GitCommandError as e:
-                print(f"Error deleting branch '{branch_name}': {str(e)}")
-
-    if dry_run:
-        print(f"\nDry run complete. {len(deleted_branches)} branches would be deleted.")
-    else:
-        print(f"\nCleanup complete. {len(deleted_branches)} branches deleted.")
-
-    return deleted_branches
-
-# %% ../../nbs/api/project/experiments.ipynb 27
-@t.runtime_checkable
-class ExperimentProtocol(t.Protocol):
-    async def __call__(self, *args, **kwargs): ...
-    async def run_async(self, name: str, dataset: Dataset): ...
-
-# %% ../../nbs/api/project/experiments.ipynb 28
-from .naming import MemorableNames
-
-# %% ../../nbs/api/project/experiments.ipynb 29
-memorable_names = MemorableNames()
-
-# %% ../../nbs/api/project/experiments.ipynb 30
-@patch
-def experiment(
-    self: Project,
-    experiment_model,
-    name_prefix: str = "",
-    save_to_git: bool = False,
-    stage_all: bool = False,
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-):
-    """Decorator for creating experiment functions.
-
-    Args:
-        experiment_model: The model type to use for experiment results
-        name_prefix: Optional prefix for experiment names
-        save_to_git: Whether to save experiment state to git
-        stage_all: Whether to stage all files when saving to git
-        backend: Backend to use for this experiment (overrides project's backend)
-
-    Returns:
-        Decorator function that wraps experiment functions
-    """
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-        @wraps(func)
-        async def wrapped_experiment(*args, **kwargs):
-            # Simply call the function
-            return await func(*args, **kwargs)
-
-        # Add run method to the wrapped function
-        async def run_async(
-            dataset: Dataset,
-            name: t.Optional[str] = None,
-            save_to_git: bool = save_to_git,
-            stage_all: bool = stage_all,
-            backend: t.Optional[rt.SUPPORTED_BACKENDS] = backend,
-        ):
-            # If name is not provided, generate a memorable name
-            if name is None:
-                name = memorable_names.generate_unique_name()
-            if name_prefix:
-                name = f"{name_prefix}-{name}"
-
-            # Determine which backend to use (parameter > decorator > project default)
-            effective_backend = backend if backend is not None else self.backend
-
-            experiment_view = None
-            try:
-                # Create the experiment view using the specified backend
-                experiment_view = self.create_experiment(
-                    name=name, model=experiment_model, backend=effective_backend
-                )
-
-                # Create tasks for all items
-                tasks = []
-                for item in dataset:
-                    tasks.append(wrapped_experiment(item))
-
-                # Calculate total operations (processing + appending)
-                total_operations = (
-                    len(tasks) * 2
-                )  # Each item requires processing and appending
-
-                # Use tqdm for combined progress tracking
-                results = []
-                progress_bar = tqdm(total=total_operations, desc="Running experiment")
-
-                # Process all items
-                for future in asyncio.as_completed(tasks):
-                    result = await future
-                    if result is not None:
-                        results.append(result)
-                    progress_bar.update(1)  # Update for task completion
-
-                # Append results to experiment view
-                for result in results:
-                    experiment_view.append(result)
-                    progress_bar.update(1)  # Update for append operation
-
-                progress_bar.close()
-
-            except Exception as e:
-                # Clean up the experiment if there was an error and it was created
-                if experiment_view is not None:
-                    try:
-                        if effective_backend == "ragas_app" and hasattr(
-                            self, "_ragas_api_client"
-                        ):
-                            # Delete the experiment in Ragas App
-                            sync_version = async_to_sync(
-                                self._ragas_api_client.delete_experiment
-                            )
-                            sync_version(
-                                project_id=self.project_id,
-                                experiment_id=experiment_view.experiment_id,
-                            )
-                        elif effective_backend == "local":
-                            # Delete the local file
-                            experiment_path = self.get_experiment_path(
-                                experiment_view.name
-                            )
-                            if os.path.exists(experiment_path):
-                                os.remove(experiment_path)
-                        # Could add more backend-specific cleanup here
-                    except Exception as cleanup_error:
-                        print(
-                            f"Failed to clean up experiment after error: {cleanup_error}"
-                        )
-
-                # Re-raise the original exception
-                raise e
-
-            # save to git if requested
-            if save_to_git:
-                repo_path = find_git_root()
-                version_experiment(
-                    experiment_name=name, repo_path=repo_path, stage_all=stage_all
-                )
-
-            return experiment_view
-
-        wrapped_experiment.__setattr__("run_async", run_async)
-        return t.cast(ExperimentProtocol, wrapped_experiment)
-
-    return decorator
-
-# %% ../../nbs/api/project/experiments.ipynb 32
-@patch
-def langfuse_experiment(
-    self: Project,
-    experiment_model,
-    name_prefix: str = "",
-    save_to_git: bool = True,
-    stage_all: bool = True,
-):
-    """Decorator for creating experiment functions with Langfuse integration.
-
-    Args:
-        experiment_model: The NotionModel type to use for experiment results
-        name_prefix: Optional prefix for experiment names
-        save_to_git: Whether to save the experiment state to git
-        stage_all: Whether to stage all files when saving to git
-
-    Returns:
-        Decorator function that wraps experiment functions with Langfuse observation
-    """
-    # Use the project's backend as the source of truth
-    backend = self.backend
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-        @wraps(func)
-        async def langfuse_wrapped_func(*args, **kwargs):
-            # Apply langfuse observation directly here
-            trace_name = (
-                f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
-            )
-            observed_func = observe(name=trace_name)(func)
-            return await observed_func(*args, **kwargs)
-
-        # Now create the experiment wrapper with our already-observed function
-        experiment_wrapper = self.experiment(
-            experiment_model, name_prefix, save_to_git, stage_all
-        )(langfuse_wrapped_func)
-
-        return t.cast(ExperimentProtocol, experiment_wrapper)
-
-    return decorator
-
-# %% ../../nbs/api/project/experiments.ipynb 33
-# this one we have to clean up
-from langfuse.decorators import observe
-
-# %% ../../nbs/api/project/experiments.ipynb 34
-@patch
-def langfuse_experiment(
-    self: Project,
-    experiment_model,
-    name_prefix: str = "",
-    save_to_git: bool = True,
-    stage_all: bool = True,
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-):
-    """Decorator for creating experiment functions with Langfuse integration.
-
-    Args:
-        experiment_model: The model type to use for experiment results
-        name_prefix: Optional prefix for experiment names
-        save_to_git: Whether to save experiment state to git
-        stage_all: Whether to stage all files when saving to git
-        backend: Backend to use for this experiment (overrides project's backend)
-
-    Returns:
-        Decorator function that wraps experiment functions with Langfuse observation
-    """
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-        @wraps(func)
-        async def langfuse_wrapped_func(*args, **kwargs):
-            # Apply langfuse observation directly here
-            trace_name = (
-                f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
-            )
-            observed_func = observe(name=trace_name)(func)
-            return await observed_func(*args, **kwargs)
-
-        # Now create the experiment wrapper with our already-observed function
-        experiment_wrapper = self.experiment(
-            experiment_model, name_prefix, save_to_git, stage_all, backend=backend
-        )(langfuse_wrapped_func)
-
-        return t.cast(ExperimentProtocol, experiment_wrapper)
-
-    return decorator
-
-# %% ../../nbs/api/project/experiments.ipynb 38
-@patch
-def mlflow_experiment(
-    self: Project,
-    experiment_model,
-    name_prefix: str = "",
-    save_to_git: bool = True,
-    stage_all: bool = True,
-):
-    """Decorator for creating experiment functions with mlflow integration.
-
-    Args:
-        experiment_model: The NotionModel type to use for experiment results
-        name_prefix: Optional prefix for experiment names
-        save_to_git: Whether to save the experiment state to git
-        stage_all: Whether to stage all files when saving to git
-
-    Returns:
-        Decorator function that wraps experiment functions with mlflow observation
-    """
-    # Use the project's backend as the source of truth
-    backend = self.backend
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-
-        @wraps(func)
-        async def mlflow_wrapped_func(*args, **kwargs):
-            # Apply mlflow observation directly here
-            trace_name = (
-                f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
-            )
-            observed_func = trace(name=trace_name)(func)
-            return await observed_func(*args, **kwargs)
-
-        # Now create the experiment wrapper with our already-observed function
-        experiment_wrapper = self.experiment(
-            experiment_model, name_prefix, save_to_git, stage_all
-        )(mlflow_wrapped_func)
-
-        return t.cast(ExperimentProtocol, experiment_wrapper)
-
-    return decorator
-
-# %% ../../nbs/api/project/experiments.ipynb 42
-from mlflow import trace
-
-
-@patch
-def mlflow_experiment(
-    self: Project,
-    experiment_model,
-    name_prefix: str = "",
-    save_to_git: bool = True,
-    stage_all: bool = True,
-    backend: t.Optional[rt.SUPPORTED_BACKENDS] = None,
-):
-    """Decorator for creating experiment functions with mlflow integration.
-
-    Args:
-        experiment_model: The model type to use for experiment results
-        name_prefix: Optional prefix for experiment names
-        save_to_git: Whether to save experiment state to git
-        stage_all: Whether to stage all files when saving to git
-        backend: Backend to use for this experiment (overrides project's backend)
-
-    Returns:
-        Decorator function that wraps experiment functions with mlflow observation
-    """
-
-    def decorator(func: t.Callable) -> ExperimentProtocol:
-
-        @wraps(func)
-        async def mlflow_wrapped_func(*args, **kwargs):
-            # Apply mlflow observation directly here
-            trace_name = (
-                f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
-            )
-            observed_func = trace(name=trace_name)(func)
-            return await observed_func(*args, **kwargs)
-
-        # Now create the experiment wrapper with our already-observed function
-        experiment_wrapper = self.experiment(
-            experiment_model, name_prefix, save_to_git, stage_all, backend=backend
-        )(mlflow_wrapped_func)
-
-        return t.cast(ExperimentProtocol, experiment_wrapper)
-
-    return decorator
-
-# %% ../../nbs/api/project/experiments.ipynb 43
-import logging
-from ..utils import plot_experiments_as_subplots
-
-
-@patch
-def compare_and_plot(
-    self: Project,
-    experiment_names: t.List[str],
-    model: t.Type[BaseModel],
-    metric_names: t.List[str],
-):
-    """Compare multiple experiments and generate a plot.
-
-    Args:
-        experiment_names: List of experiment IDs to compare
-        model: Model class defining the experiment structure
-    """
-    results = {}
-    for experiment_name in tqdm(experiment_names, desc="Fetching experiments"):
-        experiment = self.get_experiment(experiment_name, model)
-        experiment.load()
-        results[experiment_name] = {}
-        for row in experiment:
-            for metric in metric_names:
-                if metric not in results[experiment_name]:
-                    results[experiment_name][metric] = []
-                if hasattr(row, metric):
-                    results[experiment_name][metric].append(getattr(row, metric))
-                else:
-                    results[metric].append(None)
-                    logging.warning(f"Metric {metric} not found in row: {row}")
-
-    fig = plot_experiments_as_subplots(results, experiment_ids=experiment_names)
-    fig.show()
diff --git a/experimental/ragas_experimental/project/naming.py b/experimental/ragas_experimental/project/utils.py
similarity index 85%
rename from experimental/ragas_experimental/project/naming.py
rename to experimental/ragas_experimental/project/utils.py
index 15b63db33..63cebf72e 100644
--- a/experimental/ragas_experimental/project/naming.py
+++ b/experimental/ragas_experimental/project/utils.py
@@ -1,15 +1,31 @@
-"""A helper module to create fun, memorable names for experiments, datasets or anything"""
+"""Shared utilities for project module."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/project/naming.ipynb.
+import random
+import string
+import uuid
 
-# %% auto 0
-__all__ = ['MemorableNames']
 
-# %% ../../nbs/api/project/naming.ipynb 2
-import random
+def create_nano_id(size=12):
+    """Create a short, URL-safe unique identifier."""
+    # Define characters to use (alphanumeric)
+    alphabet = string.ascii_letters + string.digits
+
+    # Generate UUID and convert to int
+    uuid_int = uuid.uuid4().int
+
+    # Convert to base62
+    result = ""
+    while uuid_int:
+        uuid_int, remainder = divmod(uuid_int, len(alphabet))
+        result = alphabet[remainder] + result
+
+    # Pad if necessary and return desired length
+    return result[:size]
+
 
-# %% ../../nbs/api/project/naming.ipynb 3
 class MemorableNames:
+    """Generator for memorable, unique names for experiments and datasets."""
+
     def __init__(self):
         # List of adjectives (similar to what Docker uses)
         self.adjectives = [
@@ -189,13 +205,13 @@ def __init__(self):
         self.used_names = set()
 
     def generate_name(self):
-        """Generate a single experiment name."""
+        """Generate a single memorable name."""
         adjective = random.choice(self.adjectives)
         scientist = random.choice(self.scientists)
         return f"{adjective}_{scientist}"
 
     def generate_unique_name(self):
-        """Generate a unique experiment name."""
+        """Generate a unique memorable name."""
         attempts = 0
         max_attempts = 100  # Prevent infinite loops
 
@@ -213,5 +229,9 @@ def generate_unique_name(self):
         return unique_name
 
     def generate_unique_names(self, count):
-        """Generate multiple unique experiment names."""
+        """Generate multiple unique memorable names."""
         return [self.generate_unique_name() for _ in range(count)]
+
+
+# Global instance for easy access
+memorable_names = MemorableNames()
diff --git a/experimental/ragas_experimental/prompt/__init__.py b/experimental/ragas_experimental/prompt/__init__.py
index 680fe354d..93d9b2e5a 100644
--- a/experimental/ragas_experimental/prompt/__init__.py
+++ b/experimental/ragas_experimental/prompt/__init__.py
@@ -1,5 +1,4 @@
 from ragas_experimental.prompt.base import Prompt
 from ragas_experimental.prompt.dynamic_few_shot import DynamicFewShotPrompt
 
-
-__all__ = ['Prompt', 'DynamicFewShotPrompt']
\ No newline at end of file
+__all__ = ["Prompt", "DynamicFewShotPrompt"]
diff --git a/experimental/ragas_experimental/prompt/base.py b/experimental/ragas_experimental/prompt/base.py
index caf1332fe..93d831175 100644
--- a/experimental/ragas_experimental/prompt/base.py
+++ b/experimental/ragas_experimental/prompt/base.py
@@ -1,11 +1,7 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/prompt/base.ipynb.
+__all__ = ["Prompt"]
 
-# %% auto 0
-__all__ = ['Prompt']
-
-# %% ../../nbs/api/prompt/base.ipynb 2
-import typing as t
 import re
+import typing as t
 
 
 class Prompt:
diff --git a/experimental/ragas_experimental/prompt/dynamic_few_shot.py b/experimental/ragas_experimental/prompt/dynamic_few_shot.py
index a459caed4..265a7cd41 100644
--- a/experimental/ragas_experimental/prompt/dynamic_few_shot.py
+++ b/experimental/ragas_experimental/prompt/dynamic_few_shot.py
@@ -1,15 +1,12 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/prompt/dynamic_few_shot.ipynb.
+__all__ = ["ExampleStore", "InMemoryExampleStore", "DynamicFewShotPrompt"]
 
-# %% auto 0
-__all__ = ['ExampleStore', 'InMemoryExampleStore', 'DynamicFewShotPrompt']
-
-# %% ../../nbs/api/prompt/dynamic_few_shot.ipynb 2
 import typing as t
-import numpy as np
 from abc import ABC, abstractmethod
 
-from .base import Prompt
+import numpy as np
+
 from ..embedding import BaseEmbedding
+from .base import Prompt
 
 
 class ExampleStore(ABC):
diff --git a/experimental/ragas_experimental/simulation.py b/experimental/ragas_experimental/simulation.py
index 20cb8ad2e..7163003ed 100644
--- a/experimental/ragas_experimental/simulation.py
+++ b/experimental/ragas_experimental/simulation.py
@@ -5,9 +5,10 @@
 for evaluating conversational AI systems.
 """
 
-import typing as t
 import inspect
 import re
+import typing as t
+
 from pydantic import BaseModel, Field
 
 from .llm.llm import RagasLLM
@@ -16,22 +17,28 @@
 
 class Message(BaseModel):
     """Represents a single message in a conversation."""
+
     role: t.Literal["user", "assistant"]
     content: t.Union[str, t.Dict[str, t.Any], t.List[t.Dict[str, t.Any]]]
 
 
 class ConversationHistory(BaseModel):
     """Represents the full conversation history."""
+
     messages: t.List[Message] = Field(default_factory=list)
-    
-    def add_message(self, role: t.Literal["user", "assistant"], content: t.Union[str, t.Dict[str, t.Any], t.List[t.Dict[str, t.Any]]]) -> None:
+
+    def add_message(
+        self,
+        role: t.Literal["user", "assistant"],
+        content: t.Union[str, t.Dict[str, t.Any], t.List[t.Dict[str, t.Any]]],
+    ) -> None:
         """Add a message to the conversation history."""
         self.messages.append(Message(role=role, content=content))
-    
+
     def get_last_message(self) -> t.Optional[Message]:
         """Get the last message in the conversation."""
         return self.messages[-1] if self.messages else None
-    
+
     def to_dict_list(self) -> t.List[t.Dict[str, t.Any]]:
         """Convert conversation history to a list of dictionaries."""
         return [{"role": msg.role, "content": msg.content} for msg in self.messages]
@@ -40,12 +47,12 @@ def to_dict_list(self) -> t.List[t.Dict[str, t.Any]]:
 def validate_agent_function(func: t.Callable) -> None:
     """
     Validate agent function signature and behavior.
-    
+
     Checks:
     1. Function accepts at least 2 parameters (query, history)
     2. Function can handle basic inputs without TypeError
     3. Function returns something (not None)
-    
+
     Supports flexible agent signatures for multimodal agents:
     - Input: text, images, mixed content
     - Output: str, dict with 'content' key, or any serializable type
@@ -53,21 +60,23 @@ def validate_agent_function(func: t.Callable) -> None:
     # 1. Signature validation
     sig = inspect.signature(func)
     params = list(sig.parameters.values())
-    
+
     if len(params) < 2:
-        raise ValueError(f"Agent function must accept at least 2 parameters (query, history), got {len(params)}")
-    
+        raise ValueError(
+            f"Agent function must accept at least 2 parameters (query, history), got {len(params)}"
+        )
+
     # 2. Test call with mock data
     try:
         mock_history = ConversationHistory()
         mock_history.add_message("user", "test query")
-        
+
         result = func("test query", mock_history)
-        
+
         # 3. Return type validation - just ensure it's not None
         if result is None:
             raise ValueError("Agent function cannot return None")
-        
+
     except TypeError as e:
         raise ValueError(f"Agent function signature invalid: {e}")
 
@@ -75,7 +84,7 @@ def validate_agent_function(func: t.Callable) -> None:
 def validate_stopping_criteria(func: t.Callable[[ConversationHistory], bool]) -> None:
     """
     Validate stopping criteria function signature and behavior.
-    
+
     Checks:
     1. Function accepts exactly 1 parameter: (history: ConversationHistory)
     2. Function returns boolean or boolean-convertible value
@@ -84,43 +93,48 @@ def validate_stopping_criteria(func: t.Callable[[ConversationHistory], bool]) ->
     # 1. Signature validation
     sig = inspect.signature(func)
     params = list(sig.parameters.values())
-    
+
     if len(params) != 1:
-        raise ValueError(f"Stopping criteria must accept exactly 1 parameter (history), got {len(params)}")
-    
+        raise ValueError(
+            f"Stopping criteria must accept exactly 1 parameter (history), got {len(params)}"
+        )
+
     # 2. Test call with mock data
     try:
         mock_history = ConversationHistory()
         mock_history.add_message("user", "test")
         mock_history.add_message("assistant", "response")
-        
+
         result = func(mock_history)
-        
+
         # 3. Return type validation
         if result is None:
             raise ValueError("Stopping criteria cannot return None")
-        
+
         # Ensure it's boolean convertible
         bool(result)
-        
+
     except TypeError as e:
         raise ValueError(f"Stopping criteria signature invalid: {e}")
 
 
 class UserSimulatorResponse(BaseModel):
     """Response from the user simulator."""
+
     content: str = Field(description="The simulated user response")
-    should_continue: bool = Field(default=True, description="Whether the conversation should continue")
+    should_continue: bool = Field(
+        default=True, description="Whether the conversation should continue"
+    )
 
 
 class UserSimulator:
     """
     Simulates realistic user interactions for conversational AI evaluation.
-    
+
     This class can generate user responses based on personas, behaviors, and
     conversation context to create realistic multi-turn evaluations.
     """
-    
+
     def __init__(
         self,
         prompt: Prompt,
@@ -128,11 +142,11 @@ def __init__(
         agent_function: t.Callable,
         stopping_criteria: t.Optional[t.Callable[[ConversationHistory], bool]] = None,
         max_turns: int = 10,
-        **kwargs
+        **kwargs,
     ):
         """
         Initialize the UserSimulator.
-        
+
         Args:
             prompt: The prompt template for generating user responses
             llm: The language model to use for generating responses
@@ -146,44 +160,48 @@ def __init__(
         if "conversation_history" not in placeholders:
             # Add conversation_history to the prompt instruction
             prompt.instruction += "\n\nConversation History:\n{conversation_history}"
-        
+
         self.prompt = prompt
         self.llm = llm
         self.agent_function = agent_function
         self.stopping_criteria = stopping_criteria or self._default_stopping_criteria
         self.max_turns = max_turns
         self.kwargs = kwargs
-        
+
         # Validate agent function and stopping criteria
         validate_agent_function(self.agent_function)
         validate_stopping_criteria(self.stopping_criteria)
-    
-    def _default_stopping_criteria(self, conversation_history: ConversationHistory) -> bool:
+
+    def _default_stopping_criteria(
+        self, conversation_history: ConversationHistory
+    ) -> bool:
         """Default stopping criteria based on conversation length."""
         return len(conversation_history.messages) >= self.max_turns
-    
-    def _should_stop_conversation(self, conversation_history: ConversationHistory) -> bool:
+
+    def _should_stop_conversation(
+        self, conversation_history: ConversationHistory
+    ) -> bool:
         """Check if the conversation should be stopped."""
         try:
             result = self.stopping_criteria(conversation_history)
             return bool(result)
         except Exception as e:
             # If stopping criteria fails, stop conversation to avoid infinite loop
-            print(f"Warning: Stopping criteria failed with error: {e}. Stopping conversation.")
+            print(
+                f"Warning: Stopping criteria failed with error: {e}. Stopping conversation."
+            )
             return True
-    
+
     def _generate_user_response(
-        self, 
-        conversation_history: ConversationHistory,
-        **context_vars
+        self, conversation_history: ConversationHistory, **context_vars
     ) -> UserSimulatorResponse:
         """
         Generate a user response based on conversation history and context.
-        
+
         Args:
             conversation_history: The current conversation history
             **context_vars: Additional context variables for prompt formatting
-            
+
         Returns:
             UserSimulatorResponse containing the generated response
         """
@@ -191,22 +209,26 @@ def _generate_user_response(
         prompt_vars = {
             **context_vars,
             **self.kwargs,
-            "conversation_history": self._format_conversation_for_prompt(conversation_history)
+            "conversation_history": self._format_conversation_for_prompt(
+                conversation_history
+            ),
         }
-        
+
         # Generate the prompt
         formatted_prompt = self.prompt.format(**prompt_vars)
-        
+
         # Generate response using LLM
         response = self.llm.generate(formatted_prompt, UserSimulatorResponse)
-        
+
         return response
-    
-    def _format_conversation_for_prompt(self, conversation_history: ConversationHistory) -> str:
+
+    def _format_conversation_for_prompt(
+        self, conversation_history: ConversationHistory
+    ) -> str:
         """Format conversation history for inclusion in prompts."""
         if not conversation_history.messages:
             return "No previous conversation."
-        
+
         formatted_messages = []
         for msg in conversation_history.messages:
             # Handle different content types
@@ -216,26 +238,24 @@ def _format_conversation_for_prompt(self, conversation_history: ConversationHist
                 # Convert dict/list content to string representation
                 content_str = str(msg.content)
             formatted_messages.append(f"{msg.role.title()}: {content_str}")
-        
+
         return "\n".join(formatted_messages)
-    
+
     def run(
-        self,
-        initial_message: t.Optional[t.Dict[str, str]] = None,
-        **context_vars
+        self, initial_message: t.Optional[t.Dict[str, str]] = None, **context_vars
     ) -> ConversationHistory:
         """
         Run a complete conversation simulation.
-        
+
         Args:
             initial_message: Optional initial message to start the conversation
             **context_vars: Additional context variables for the simulation
-            
+
         Returns:
             ConversationHistory containing the complete conversation
         """
         conversation_history = ConversationHistory()
-        
+
         # Add initial message if provided
         if initial_message:
             role = initial_message.get("role", "user")
@@ -243,61 +263,64 @@ def run(
             # Ensure role is valid
             if role not in ["user", "assistant"]:
                 role = "user"
-            conversation_history.add_message(t.cast(t.Literal["user", "assistant"], role), content)
-        
+            conversation_history.add_message(
+                t.cast(t.Literal["user", "assistant"], role), content
+            )
+
         # Continue conversation until stopping criteria is met
         while not self._should_stop_conversation(conversation_history):
             last_message = conversation_history.get_last_message()
-            
+
             # If last message was from user, get agent response
             if last_message and last_message.role == "user":
                 try:
                     # Call the agent function with the conversation history
                     agent_response = self.agent_function(
-                        last_message.content,
-                        conversation_history
+                        last_message.content, conversation_history
                     )
-                    
+
                     # Add agent response to conversation
                     if isinstance(agent_response, str):
                         conversation_history.add_message("assistant", agent_response)
-                    elif isinstance(agent_response, dict) and "content" in agent_response:
+                    elif (
+                        isinstance(agent_response, dict) and "content" in agent_response
+                    ):
                         role = agent_response.get("role", "assistant")
                         if role not in ["user", "assistant"]:
                             role = "assistant"
-                        conversation_history.add_message(role, agent_response["content"])
+                        conversation_history.add_message(
+                            role, agent_response["content"]
+                        )
                     else:
                         # Handle other response formats
-                        conversation_history.add_message("assistant", str(agent_response))
-                
+                        conversation_history.add_message(
+                            "assistant", str(agent_response)
+                        )
+
                 except Exception as e:
                     # Handle agent function errors gracefully
-                    conversation_history.add_message(
-                        "assistant", 
-                        f"Error: {str(e)}"
-                    )
-            
+                    conversation_history.add_message("assistant", f"Error: {str(e)}")
+
             # If conversation should continue, generate user response
             if not self._should_stop_conversation(conversation_history):
                 user_response = self._generate_user_response(
-                    conversation_history, 
-                    **context_vars
+                    conversation_history, **context_vars
                 )
-                
+
                 # Add user response to conversation
                 conversation_history.add_message("user", user_response.content)
-                
+
                 # Check if user wants to stop
                 if not user_response.should_continue:
                     break
-        
+
         return conversation_history
 
 
 def default_stopping_criteria(conversation_history: ConversationHistory) -> bool:
     """
     Default stopping criteria function.
-    
+
     Stops conversation when it reaches 10 messages or more.
     """
-    return len(conversation_history.messages) >= 10
\ No newline at end of file
+    return len(conversation_history.messages) >= 10
diff --git a/experimental/ragas_experimental/tracing/langfuse.py b/experimental/ragas_experimental/tracing/langfuse.py
index 334e7be3e..66884ad40 100644
--- a/experimental/ragas_experimental/tracing/langfuse.py
+++ b/experimental/ragas_experimental/tracing/langfuse.py
@@ -1,28 +1,22 @@
-"""Utils to help with interact with langfuse traces"""
+"""Utils to help to interact with langfuse traces"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/tracing/langfuse.ipynb.
+__all__ = ["observe", "logger", "LangfuseTrace", "sync_trace", "add_query_param"]
 
-# %% auto 0
-__all__ = ['observe', 'logger', 'LangfuseTrace', 'sync_trace', 'add_query_param']
-
-# %% ../../nbs/api/tracing/langfuse.ipynb 2
-import typing as t
 import asyncio
 import logging
-from urllib.parse import urlparse, parse_qsl, urlencode, urlunparse
+import typing as t
+from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
 
 from langfuse.api import Observation, TraceWithFullDetails
 from langfuse.decorators import langfuse_context, observe
 from langfuse.utils.langfuse_singleton import LangfuseSingleton
 
-# %% ../../nbs/api/tracing/langfuse.ipynb 3
 # just adding it to the namespace
 observe = observe
 
-# %% ../../nbs/api/tracing/langfuse.ipynb 4
 logger = logging.getLogger(__name__)
 
-# %% ../../nbs/api/tracing/langfuse.ipynb 5
+
 class LangfuseTrace:
     def __init__(self, trace: TraceWithFullDetails):
         self.trace = trace
@@ -35,7 +29,7 @@ def filter(self, span_name: str) -> t.List[Observation]:
         trace = self._langfuse_client.fetch_trace(self.trace.id)
         return [span for span in trace.data.observations if span.name == span_name]
 
-# %% ../../nbs/api/tracing/langfuse.ipynb 6
+
 async def sync_trace(
     trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2
 ) -> LangfuseTrace:
@@ -72,7 +66,7 @@ async def sync_trace(
 
     raise ValueError(f"Trace {trace_id} not found after {max_retries} attempts")
 
-# %% ../../nbs/api/tracing/langfuse.ipynb 7
+
 def add_query_param(url, param_name, param_value):
     """Add a query parameter to a URL."""
     # Parse the URL
diff --git a/experimental/ragas_experimental/tracing/mlflow.py b/experimental/ragas_experimental/tracing/mlflow.py
index b3484d223..9d73cc0b3 100644
--- a/experimental/ragas_experimental/tracing/mlflow.py
+++ b/experimental/ragas_experimental/tracing/mlflow.py
@@ -1,16 +1,13 @@
 """tracing using mlflow"""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/api/tracing/mlflow.ipynb.
+__all__ = ["MLflowTrace", "sync_trace"]
 
-# %% auto 0
-__all__ = ['MLflowTrace', 'sync_trace']
-
-# %% ../../nbs/api/tracing/mlflow.ipynb 2
 import os
 import typing as t
-from mlflow.entities.trace import Trace
-from mlflow.entities.span import Span
+
 from mlflow import get_last_active_trace
+from mlflow.entities.span import Span
+from mlflow.entities.trace import Trace
 
 
 class MLflowTrace:
@@ -41,7 +38,7 @@ def get_filter(self, span_name) -> t.List[Span]:
 
         return self.trace.search_spans(name=span_name)
 
-# %% ../../nbs/api/tracing/mlflow.ipynb 3
+
 async def sync_trace():
 
     trace = get_last_active_trace()
diff --git a/experimental/ragas_experimental/typing.py b/experimental/ragas_experimental/typing.py
index 9e1b42deb..c7e2339a2 100644
--- a/experimental/ragas_experimental/typing.py
+++ b/experimental/ragas_experimental/typing.py
@@ -1,30 +1,37 @@
 """Field Metadata for python's `t.Annotate`."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/typing.ipynb.
+__all__ = [
+    "SUPPORTED_BACKENDS",
+    "DEFAULT_COLUMN_SETTINGS",
+    "COLOR_MAP",
+    "ColumnType",
+    "FieldMeta",
+    "Number",
+    "Text",
+    "Url",
+    "get_colors_for_options",
+    "Select",
+    "MultiSelect",
+    "Checkbox",
+    "Date",
+    "Custom",
+    "ModelConverter",
+    "infer_metric_result_type",
+    "infer_field_type",
+]
 
-# %% auto 0
-__all__ = ['SUPPORTED_BACKENDS', 'DEFAULT_COLUMN_SETTINGS', 'COLOR_MAP', 'ColumnType', 'FieldMeta', 'Number', 'Text', 'Url',
-           'get_colors_for_options', 'Select', 'MultiSelect', 'Checkbox', 'Date', 'Custom', 'ModelConverter',
-           'infer_metric_result_type', 'infer_field_type']
-
-# %% ../nbs/api/typing.ipynb 2
 import typing as t
+from datetime import date, datetime
 from enum import Enum
-import inspect
-from datetime import datetime, date
 
-from pydantic import BaseModel, create_model
 from fastcore.utils import patch
 
 from .metric.result import MetricResult
 
-# %% ../nbs/api/typing.ipynb 4
-import typing as t
-
 # Define supported backends
-SUPPORTED_BACKENDS = t.Literal["ragas_app", "local"]
+SUPPORTED_BACKENDS = t.Literal["ragas/app", "local/csv"]
+
 
-# %% ../nbs/api/typing.ipynb 6
 class ColumnType(str, Enum):
     """Column types supported by the Ragas API."""
 
@@ -37,7 +44,7 @@ class ColumnType(str, Enum):
     URL = "url"
     CUSTOM = "custom"
 
-# %% ../nbs/api/typing.ipynb 7
+
 DEFAULT_COLUMN_SETTINGS = {
     "width": 255,
     "isVisible": True,
@@ -63,7 +70,7 @@ def __init__(
         if settings:
             self.settings.update(settings)
 
-# %% ../nbs/api/typing.ipynb 8
+
 class Number(FieldMeta):
     """Number field metadata."""
 
@@ -83,7 +90,7 @@ def __init__(
                 settings["range"]["max"] = max_value
         super().__init__(ColumnType.NUMBER, required, id, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 9
+
 class Text(FieldMeta):
     """Text field metadata."""
 
@@ -95,7 +102,7 @@ def __init__(
             settings["max_length"] = max_length
         super().__init__(ColumnType.TEXT, required, id, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 10
+
 class Url(FieldMeta):
     """Url field metadata."""
 
@@ -103,7 +110,7 @@ def __init__(self, required: bool = True, id: t.Optional[str] = None):
         settings = {}
         super().__init__(ColumnType.URL, required, id, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 11
+
 # dict of possible colors for select fields
 COLOR_MAP = {
     "red": "hsl(0, 85%, 60%)",
@@ -158,7 +165,7 @@ def get_colors_for_options(options, color_names=None):
         for i, option in enumerate(options)
     ]
 
-# %% ../nbs/api/typing.ipynb 12
+
 class Select(FieldMeta):
     """Select field metadata."""
 
@@ -180,7 +187,7 @@ def __init__(
                 settings["options"] = get_colors_for_options(options)
         super().__init__(ColumnType.SELECT, required, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 13
+
 class MultiSelect(FieldMeta):
     """MultiSelect field metadata."""
 
@@ -190,14 +197,14 @@ def __init__(self, options: t.Optional[t.List[str]] = None, required: bool = Tru
             settings["options"] = [{"name": option} for option in options]
         super().__init__(ColumnType.MULTI_SELECT, required, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 14
+
 class Checkbox(FieldMeta):
     """Checkbox field metadata."""
 
     def __init__(self, required: bool = True):
         super().__init__(ColumnType.CHECKBOX, required)
 
-# %% ../nbs/api/typing.ipynb 15
+
 class Date(FieldMeta):
     """Date field metadata."""
 
@@ -207,7 +214,7 @@ def __init__(self, include_time: bool = False, required: bool = True):
             settings["include_time"] = include_time
         super().__init__(ColumnType.DATE, required, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 16
+
 class Custom(FieldMeta):
     """Custom field metadata."""
 
@@ -217,11 +224,11 @@ def __init__(self, custom_type: str = "", required: bool = True):
             settings["type"] = custom_type
         super().__init__(ColumnType.CUSTOM, required, settings=settings)
 
-# %% ../nbs/api/typing.ipynb 18
+
 class ModelConverter:
     """Convert Pydantic models to Ragas API columns and rows."""
 
-# %% ../nbs/api/typing.ipynb 19
+
 def infer_metric_result_type(field_value):
     """Infer field type from a MetricResult instance."""
     if field_value is None:
@@ -241,7 +248,7 @@ def infer_metric_result_type(field_value):
         # Default to Text for string or other types
         return Text()
 
-# %% ../nbs/api/typing.ipynb 20
+
 def infer_field_type(annotation, field_info):
     """Infer field type from Python type annotation."""
     # Check for Annotated with our custom metadata
@@ -330,7 +337,7 @@ def infer_field_type(annotation, field_info):
     # Default to Text for complex or unknown types
     return Text()
 
-# %% ../nbs/api/typing.ipynb 21
+
 @patch(cls_method=True)
 def model_to_columns(cls: ModelConverter, model_class):
     """Convert a Pydantic model class to Ragas API column definitions."""
@@ -389,7 +396,7 @@ def model_to_columns(cls: ModelConverter, model_class):
         columns[i]["settings"]["position"] = i
     return columns
 
-# %% ../nbs/api/typing.ipynb 24
+
 @patch(cls_method=True)
 def instance_to_row(cls: ModelConverter, instance, model_class=None):
     """Convert a Pydantic model instance to a Ragas API row."""
@@ -440,7 +447,7 @@ def instance_to_row(cls: ModelConverter, instance, model_class=None):
 
     return {"data": row_cells}
 
-# %% ../nbs/api/typing.ipynb 25
+
 @patch(cls_method=True)
 def instances_to_rows(cls: ModelConverter, instances, model_class=None):
     """Convert multiple Pydantic model instances to Ragas API rows."""
diff --git a/experimental/ragas_experimental/utils.py b/experimental/ragas_experimental/utils.py
index ea2df2e9f..c3cfb83da 100644
--- a/experimental/ragas_experimental/utils.py
+++ b/experimental/ragas_experimental/utils.py
@@ -1,15 +1,23 @@
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/api/utils.ipynb.
+__all__ = [
+    "create_nano_id",
+    "async_to_sync",
+    "plot_experiments_as_subplots",
+    "get_test_directory",
+]
 
-# %% auto 0
-__all__ = ['create_nano_id', 'async_to_sync', 'plot_experiments_as_subplots', 'get_test_directory']
-
-# %% ../nbs/api/utils.ipynb 2
+import asyncio
+import functools
+import os
 import string
+import tempfile
 import uuid
-import functools
-import asyncio
+from collections import Counter
+
+import numpy as np
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+
 
-# %% ../nbs/api/utils.ipynb 3
 def create_nano_id(size=12):
     # Define characters to use (alphanumeric)
     alphabet = string.ascii_letters + string.digits
@@ -26,7 +34,7 @@ def create_nano_id(size=12):
     # Pad if necessary and return desired length
     return result[:size]
 
-# %% ../nbs/api/utils.ipynb 4
+
 def async_to_sync(async_func):
     """Convert an async function to a sync function"""
 
@@ -47,12 +55,6 @@ def sync_wrapper(*args, **kwargs):
 
     return sync_wrapper
 
-# %% ../nbs/api/utils.ipynb 5
-import numpy as np
-import plotly.graph_objects as go
-from plotly.subplots import make_subplots
-from collections import Counter
-
 
 def plot_experiments_as_subplots(data, experiment_names=None):
     """
@@ -224,11 +226,7 @@ def plot_experiments_as_subplots(data, experiment_names=None):
 
     return fig
 
-# %% ../nbs/api/utils.ipynb 7
-import tempfile
-import os
 
-# %% ../nbs/api/utils.ipynb 8
 # Helper function for tests
 def get_test_directory():
     """Create a test directory that will be cleaned up on process exit.
diff --git a/experimental/settings.ini b/experimental/settings.ini
deleted file mode 100644
index b8f50e6b5..000000000
--- a/experimental/settings.ini
+++ /dev/null
@@ -1,46 +0,0 @@
-[DEFAULT]
-# All sections below are required unless otherwise specified.
-# See https://github.com/AnswerDotAI/nbdev/blob/main/settings.ini for examples.
-
-### Python library ###
-repo = ragas_experimental
-lib_name = %(repo)s
-# Version is managed by setuptools_scm from Git tags - do not specify here
-min_python = 3.9
-license = apache2
-black_formatting = True
-
-### nbdev ###
-doc_path = ../docs/experimental
-lib_path = %(repo)s
-nbs_path = nbs
-recursive = True
-tst_flags = notest
-# Explicitly disable nbdev's version management - we use setuptools_scm instead
-put_version_in_init = False
-
-### Docs ###
-branch = main
-custom_sidebar = True
-doc_host = https://%(user)s.github.io
-doc_baseurl = /%(repo)s
-git_url = https://github.com/%(user)s/%(repo)s
-title = %(lib_name)s
-
-### PyPI ###
-audience = Developers
-author = jjmachan
-author_email = jamesjithin97@gmail.com
-copyright = 2025 onwards, %(author)s
-description = Experimental Ragas Evaluation UI and Library
-keywords = nbdev jupyter notebook python
-language = English
-status = 3
-user = explodinggradients
-
-### Dependencies ###
-requirements = fastcore tqdm langfuse instructor pydantic numpy plotly mlflow gitpython
-dev_requirements = pytest black
-# console_scripts =
-# conda_user = 
-# package_data =
diff --git a/experimental/setup.py b/experimental/setup.py
deleted file mode 100644
index 6ccf5bc95..000000000
--- a/experimental/setup.py
+++ /dev/null
@@ -1,78 +0,0 @@
-from pkg_resources import parse_version
-from configparser import ConfigParser
-import setuptools, shlex
-import os
-import pathlib
-assert parse_version(setuptools.__version__)>=parse_version('36.2')
-
-# note: all settings are in settings.ini; edit there, not here
-config = ConfigParser(delimiters=['='])
-config.read('settings.ini', encoding='utf-8')
-cfg = config['DEFAULT']
-
-# Configure setuptools_scm - this should match pyproject.toml configuration
-use_scm_version = {
-    "root": "..",                   # Path to monorepo root
-    "relative_to": __file__,        # Resolve paths relative to this file
-    "fallback_version": "0.0.0",    # Fallback if Git data is not available
-}
-
-# Modify expected keys to handle setuptools_scm version management
-cfg_keys = 'description keywords author author_email'.split()
-expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
-for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
-
-# Add version to cfg so the setup still works even though it's not in settings.ini
-cfg['version'] = '0.0.0'  # This will be overridden by setuptools_scm
-setup_cfg = {o:cfg[o] for o in cfg_keys}
-
-licenses = {
-    'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
-    'mit': ('MIT License', 'OSI Approved :: MIT License'),
-    'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
-    'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
-    'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
-}
-statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
-    '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
-py_versions = '3.6 3.7 3.8 3.9 3.10 3.11 3.12'.split()
-
-requirements = shlex.split(cfg.get('requirements', ''))
-if cfg.get('pip_requirements'): requirements += shlex.split(cfg.get('pip_requirements', ''))
-min_python = cfg['min_python']
-lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
-dev_requirements = (cfg.get('dev_requirements') or '').split()
-
-package_data = dict()
-pkg_data = cfg.get('package_data', None)
-if pkg_data:
-    package_data[cfg['lib_name']] =  pkg_data.split() # split as multiple files might be listed
-# Add package data to setup_cfg for setuptools.setup(..., **setup_cfg)
-setup_cfg['package_data'] = package_data
-
-setuptools.setup(
-    name = cfg['lib_name'],
-    license = lic[0],
-    use_scm_version = use_scm_version,  # Use Git tags for versioning
-    classifiers = [
-        'Development Status :: ' + statuses[int(cfg['status'])],
-        'Intended Audience :: ' + cfg['audience'].title(),
-        'Natural Language :: ' + cfg['language'].title(),
-    ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
-    url = cfg['git_url'],
-    packages = setuptools.find_packages(),
-    include_package_data = True,
-    install_requires = requirements,
-    extras_require={ 'dev': dev_requirements },
-    dependency_links = cfg.get('dep_links','').split(),
-    python_requires  = '>=' + cfg['min_python'],
-    long_description = open('README.md', encoding='utf-8').read(),
-    long_description_content_type = 'text/markdown',
-    zip_safe = False,
-    entry_points = {
-        'console_scripts': cfg.get('console_scripts','').split(),
-        'nbdev': [f'{cfg.get("lib_path")}={cfg.get("lib_path")}._modidx:d']
-    },
-    **{k: v for k, v in setup_cfg.items() if k != 'version'})
-
-
diff --git a/experimental/old_nbs/.notest b/experimental/tests/__init__.py
similarity index 100%
rename from experimental/old_nbs/.notest
rename to experimental/tests/__init__.py
diff --git a/experimental/tests/conftest.py b/experimental/tests/conftest.py
new file mode 100644
index 000000000..5bf5ec727
--- /dev/null
+++ b/experimental/tests/conftest.py
@@ -0,0 +1,81 @@
+from __future__ import annotations
+
+import typing as t
+
+import numpy as np
+import pytest
+from pydantic import BaseModel
+
+from ragas_experimental.embedding.base import BaseEmbedding
+
+
+def pytest_configure(config):
+    """
+    configure pytest for experimental tests
+    """
+    # Extra Pytest Markers
+    # add `experimental_ci`
+    config.addinivalue_line(
+        "markers",
+        "experimental_ci: Set of tests that will be run as part of Experimental CI",
+    )
+    # add `e2e`
+    config.addinivalue_line(
+        "markers",
+        "e2e: End-to-End tests for Experimental",
+    )
+
+
+class MockLLM:
+    """Mock LLM for testing purposes"""
+    
+    def __init__(self):
+        self.provider = "mock"
+        self.model = "mock-model"
+        self.is_async = True
+    
+    def generate(self, prompt: str, response_model: t.Type[BaseModel]) -> BaseModel:
+        # Return a mock instance of the response model
+        return response_model()
+    
+    async def agenerate(self, prompt: str, response_model: t.Type[BaseModel]) -> BaseModel:
+        # Return a mock instance of the response model
+        return response_model()
+
+
+class MockEmbedding(BaseEmbedding):
+    """Mock Embedding for testing purposes"""
+
+    def embed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:
+        np.random.seed(42)  # Set seed for deterministic tests
+        return np.random.rand(768).tolist()
+
+    async def aembed_text(self, text: str, **kwargs: t.Any) -> t.List[float]:
+        np.random.seed(42)  # Set seed for deterministic tests
+        return np.random.rand(768).tolist()
+
+    def embed_document(
+        self, 
+        text: str, 
+        metadata: t.Dict[str, t.Any] = None, 
+        **kwargs: t.Any
+    ) -> t.List[float]:
+        return self.embed_text(text, **kwargs)
+
+    async def aembed_document(
+        self, 
+        text: str, 
+        metadata: t.Dict[str, t.Any] = None, 
+        **kwargs: t.Any
+    ) -> t.List[float]:
+        return await self.aembed_text(text, **kwargs)
+
+
+@pytest.fixture
+def mock_llm():
+    return MockLLM()
+
+
+@pytest.fixture
+def mock_embedding():
+    return MockEmbedding()
\ No newline at end of file
diff --git a/experimental/old_nbs/api/backends/.notest b/experimental/tests/e2e/__init__.py
similarity index 100%
rename from experimental/old_nbs/api/backends/.notest
rename to experimental/tests/e2e/__init__.py
diff --git a/experimental/tests/e2e/test_integration.py b/experimental/tests/e2e/test_integration.py
new file mode 100644
index 000000000..923590974
--- /dev/null
+++ b/experimental/tests/e2e/test_integration.py
@@ -0,0 +1,255 @@
+import tempfile
+import typing as t
+import pytest
+from unittest.mock import Mock
+from dataclasses import dataclass, field
+from ragas_experimental.project.core import Project
+from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel
+from ragas_experimental.metric import MetricResult
+from ragas_experimental.metric.base import Metric
+
+
+class EvaluationData(BaseModel):
+    """Model for evaluation data."""
+    question: str
+    context: str
+    answer: str
+    ground_truth: str
+
+
+class EvaluationResult(BaseModel):
+    """Model for evaluation results."""
+    result: float
+    reason: str
+
+
+@dataclass
+class IntegrationMetric(Metric):
+    """Simple metric for integration testing."""
+    
+    def __post_init__(self):
+        super().__post_init__()
+        self._response_model = EvaluationResult
+
+
+@pytest.fixture
+def temp_project():
+    """Create a temporary project for integration testing."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        project = Project.create(
+            name="integration_test_project",
+            description="Project for integration testing",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        yield project
+
+
+@pytest.fixture
+def mock_llm():
+    """Create a mock LLM for testing."""
+    llm = Mock()
+    
+    def mock_generate(prompt, response_model):
+        return response_model(result=0.8, reason="Mock evaluation")
+    
+    llm.generate = mock_generate
+    return llm
+
+
+def test_full_evaluation_workflow(temp_project, mock_llm):
+    """Test a complete evaluation workflow with project, dataset, and metrics."""
+    
+    # 1. Create a dataset
+    dataset = temp_project.create_dataset(
+        name="evaluation_dataset",
+        model=EvaluationData
+    )
+    
+    # 2. Add evaluation data
+    eval_data = [
+        EvaluationData(
+            question="What is the capital of France?",
+            context="France is a country in Europe. Its capital is Paris.",
+            answer="Paris",
+            ground_truth="Paris"
+        ),
+        EvaluationData(
+            question="What is 2+2?",
+            context="Basic arithmetic operations.",
+            answer="4",
+            ground_truth="4"
+        )
+    ]
+    
+    for data in eval_data:
+        dataset.append(data)
+    
+    # 3. Create a metric
+    metric = IntegrationMetric(
+        name="integration_metric",
+        prompt="Evaluate if the answer '{answer}' is correct given the question '{question}' and context '{context}'. Ground truth: '{ground_truth}'"
+    )
+    
+    # 4. Run evaluation on dataset
+    results = []
+    for entry in dataset:
+        result = metric.score(
+            llm=mock_llm,
+            question=entry.question,
+            context=entry.context,
+            answer=entry.answer,
+            ground_truth=entry.ground_truth
+        )
+        results.append(result)
+    
+    # 5. Verify results
+    assert len(results) == 2
+    assert all(isinstance(result, MetricResult) for result in results)
+    assert all(result.result == 0.8 for result in results)  # Mock always returns 0.8
+
+
+def test_project_dataset_persistence(temp_project):
+    """Test that data persists across dataset operations."""
+    
+    # Create dataset and add data
+    dataset = temp_project.create_dataset(
+        name="persistence_test",
+        model=EvaluationData
+    )
+    
+    test_data = EvaluationData(
+        question="Test question",
+        context="Test context", 
+        answer="Test answer",
+        ground_truth="Test ground truth"
+    )
+    
+    dataset.append(test_data)
+    assert len(dataset) == 1
+    
+    # Load dataset again (simulates persistence)
+    dataset.load()
+    assert len(dataset) == 1
+    
+    # Verify data integrity
+    loaded_data = dataset[0]
+    assert loaded_data.question == "Test question"
+    assert loaded_data.context == "Test context"
+    assert loaded_data.answer == "Test answer"
+    assert loaded_data.ground_truth == "Test ground truth"
+
+
+def test_batch_evaluation_workflow(temp_project, mock_llm):
+    """Test batch evaluation across multiple entries."""
+    
+    # Create dataset with multiple entries
+    dataset = temp_project.create_dataset(
+        name="batch_evaluation",
+        model=EvaluationData
+    )
+    
+    # Add multiple evaluation entries
+    for i in range(5):
+        dataset.append(EvaluationData(
+            question=f"Question {i}",
+            context=f"Context {i}",
+            answer=f"Answer {i}",
+            ground_truth=f"Ground truth {i}"
+        ))
+    
+    # Create metric
+    metric = IntegrationMetric(
+        name="batch_metric",
+        prompt="Evaluate: {question} with context: {context} -> {answer} vs ground_truth: {ground_truth}"
+    )
+    
+    # Run individual evaluations (since batch_score doesn't exist in the real API)
+    batch_results = []
+    for entry in dataset:
+        result = metric.score(
+            llm=mock_llm,
+            question=entry.question,
+            context=entry.context,
+            answer=entry.answer,
+            ground_truth=entry.ground_truth
+        )
+        batch_results.append(result)
+    
+    # Verify batch results
+    assert len(batch_results) == 5
+    assert all(isinstance(result, MetricResult) for result in batch_results)
+
+
+def test_dataset_modification_workflow(temp_project):
+    """Test modifying dataset entries and persistence."""
+    
+    dataset = temp_project.create_dataset(
+        name="modification_test",
+        model=EvaluationData
+    )
+    
+    # Add initial data
+    initial_data = EvaluationData(
+        question="Initial question",
+        context="Initial context",
+        answer="Initial answer", 
+        ground_truth="Initial ground truth"
+    )
+    dataset.append(initial_data)
+    
+    # Modify the entry
+    entry = dataset[0]
+    entry.answer = "Modified answer"
+    dataset.save(entry)
+    
+    # Verify modification persisted
+    assert dataset[0].answer == "Modified answer"
+    
+    # Load and verify persistence
+    dataset.load()
+    assert dataset[0].answer == "Modified answer"
+    assert dataset[0].question == "Initial question"  # Other fields unchanged
+
+
+def test_metric_variable_extraction_integration(mock_llm):
+    """Test that metrics can extract variables from complex prompts."""
+    
+    metric = IntegrationMetric(
+        name="variable_test",
+        prompt="Given the question: '{question}', context: '{context}', and answer: '{answer}', evaluate against ground truth: '{ground_truth}'. Consider the difficulty: '{difficulty}' and domain: '{domain}'."
+    )
+    
+    variables = metric.get_variables()
+    expected_vars = {"question", "context", "answer", "ground_truth", "difficulty", "domain"}
+    
+    assert set(variables) == expected_vars
+
+
+@pytest.mark.asyncio
+async def test_async_evaluation_integration(temp_project):
+    """Test async evaluation workflow."""
+    
+    # Mock async LLM
+    async_llm = Mock()
+    
+    async def mock_agenerate(prompt, response_model):
+        return response_model(result=0.9, reason="Async mock evaluation")
+    
+    async_llm.agenerate = mock_agenerate
+    
+    # Create metric
+    metric = IntegrationMetric(
+        name="async_metric",
+        prompt="Async evaluate: {question} -> {answer}"
+    )
+    
+    # Test async scoring
+    result = await metric.ascore(
+        llm=async_llm,
+        question="Test question",
+        answer="Test answer"
+    )
+    
+    assert isinstance(result, MetricResult)
+    assert result.result == 0.9
\ No newline at end of file
diff --git a/experimental/old_nbs/api/project/.notest b/experimental/tests/unit/__init__.py
similarity index 100%
rename from experimental/old_nbs/api/project/.notest
rename to experimental/tests/unit/__init__.py
diff --git a/experimental/tests/unit/test_dataset.py b/experimental/tests/unit/test_dataset.py
new file mode 100644
index 000000000..c8d47d35b
--- /dev/null
+++ b/experimental/tests/unit/test_dataset.py
@@ -0,0 +1,270 @@
+import tempfile
+import typing as t
+import pytest
+
+from ragas_experimental.dataset import Dataset
+from ragas_experimental.project.core import Project
+from ragas_experimental.model.pydantic_model import ExtendedPydanticBaseModel as BaseModel
+from ragas_experimental.metric import MetricResult
+
+
+class DatasetModel(BaseModel):
+    id: int
+    name: str
+    description: str
+
+
+class ExperimentModel(DatasetModel):
+    tags: t.Literal["tag1", "tag2", "tag3"]
+    result: MetricResult
+
+
+@pytest.fixture
+def temp_dir():
+    """Create a temporary directory for tests."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        yield temp_dir
+
+
+@pytest.fixture
+def test_project(temp_dir):
+    """Create a test project."""
+    return Project.create(name="test_project", backend="local/csv", root_dir=temp_dir)
+
+
+@pytest.fixture
+def dataset_instance():
+    """Create a test dataset instance."""
+    return DatasetModel(
+        id=0,
+        name="test",
+        description="test description",
+    )
+
+
+@pytest.fixture
+def experiment_instance(dataset_instance):
+    """Create a test experiment instance."""
+    return ExperimentModel(
+        **dataset_instance.model_dump(),
+        tags="tag1",
+        result=MetricResult(result=0.5, reason="test reason"),
+    )
+
+
+def test_model_creation(dataset_instance, experiment_instance):
+    """Test that models can be created successfully."""
+    assert dataset_instance.id == 0
+    assert dataset_instance.name == "test"
+    assert dataset_instance.description == "test description"
+    
+    assert experiment_instance.id == 0
+    assert experiment_instance.tags == "tag1"
+    assert experiment_instance.result.result == 0.5
+
+
+def test_dataset_creation(test_project):
+    """Test creating datasets with different models."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    assert len(dataset_with_dataset_model) == 0
+    assert len(dataset_with_experiment_model) == 0
+
+
+def test_dataset_append_and_length(test_project, dataset_instance, experiment_instance):
+    """Test appending entries to datasets and checking length."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    dataset_with_dataset_model.append(dataset_instance)
+    dataset_with_experiment_model.append(experiment_instance)
+    
+    assert len(dataset_with_dataset_model) == 1
+    assert len(dataset_with_experiment_model) == 1
+
+
+def test_dataset_pop(test_project, dataset_instance, experiment_instance):
+    """Test removing entries from datasets."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    dataset_with_dataset_model.append(dataset_instance)
+    dataset_with_experiment_model.append(experiment_instance)
+    
+    dataset_with_dataset_model.pop()
+    dataset_with_experiment_model.pop()
+    
+    assert len(dataset_with_dataset_model) == 0
+    assert len(dataset_with_experiment_model) == 0
+
+
+def test_dataset_multiple_entries(test_project, dataset_instance, experiment_instance):
+    """Test adding multiple entries to datasets."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    # Add 10 entries
+    for i in range(10):
+        dataset_with_dataset_model.append(dataset_instance)
+        dataset_with_experiment_model.append(experiment_instance)
+    
+    assert len(dataset_with_dataset_model) == 10
+    assert len(dataset_with_experiment_model) == 10
+
+
+def test_dataset_load(test_project, dataset_instance, experiment_instance):
+    """Test loading datasets from storage."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    
+    # Only test with DatasetModel since ExperimentModel has MetricResult serialization issues
+    # Add some entries
+    for i in range(5):
+        dataset_with_dataset_model.append(dataset_instance)
+    
+    # Load from storage (this should work even if already loaded)
+    dataset_with_dataset_model.load()
+    
+    assert len(dataset_with_dataset_model) == 5
+
+
+def test_dataset_load_as_dicts(test_project, dataset_instance, experiment_instance):
+    """Test loading dataset entries as dictionaries."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    
+    dataset_with_dataset_model.append(dataset_instance)
+    
+    dicts = dataset_with_dataset_model.load_as_dicts()
+    
+    assert len(dicts) == 1
+    assert dicts[0]["id"] == 0
+    assert dicts[0]["name"] == "test"
+    assert dicts[0]["description"] == "test description"
+
+
+def test_dataset_to_pandas(test_project, experiment_instance):
+    """Test converting dataset to pandas DataFrame."""
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    for i in range(3):
+        dataset_with_experiment_model.append(experiment_instance)
+    
+    df = dataset_with_experiment_model.to_pandas()
+    
+    assert len(df) == 3
+    assert "id" in df.columns
+    assert "name" in df.columns
+    assert "tags" in df.columns
+    assert "result" in df.columns
+
+
+def test_dataset_save_entry(test_project, experiment_instance):
+    """Test saving changes to an entry."""
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    dataset_with_experiment_model.append(experiment_instance)
+    
+    # Get the entry and modify it
+    entry = dataset_with_experiment_model[0]
+    entry.name = "updated name"
+    
+    # Save the changes
+    dataset_with_experiment_model.save(entry)
+    
+    # Verify the change persisted
+    assert dataset_with_experiment_model[0].name == "updated name"
+
+
+def test_dataset_get_by_field(test_project, experiment_instance):
+    """Test getting entries by field value."""
+    dataset_with_experiment_model = test_project.create_dataset(
+        name="dataset_with_experiment_model", 
+        model=ExperimentModel
+    )
+    
+    dataset_with_experiment_model.append(experiment_instance)
+    
+    # Get the entry's row_id
+    entry = dataset_with_experiment_model[0]
+    row_id = entry._row_id
+    
+    # Find entry by row_id
+    found_entry = dataset_with_experiment_model.get(row_id)
+    
+    assert found_entry is not None
+    assert found_entry._row_id == row_id
+    assert found_entry.name == experiment_instance.name
+
+
+def test_dataset_iteration(test_project, dataset_instance):
+    """Test iterating over dataset entries."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    
+    # Add multiple entries
+    for i in range(3):
+        dataset_with_dataset_model.append(dataset_instance)
+    
+    # Test iteration
+    count = 0
+    for entry in dataset_with_dataset_model:
+        assert entry.name == "test"
+        count += 1
+    
+    assert count == 3
+
+
+def test_dataset_indexing(test_project, dataset_instance):
+    """Test accessing dataset entries by index."""
+    dataset_with_dataset_model = test_project.create_dataset(
+        name="dataset_with_dataset_model", 
+        model=DatasetModel
+    )
+    
+    dataset_with_dataset_model.append(dataset_instance)
+    
+    # Test indexing
+    first_entry = dataset_with_dataset_model[0]
+    assert first_entry.name == "test"
+    
+    # Test slicing
+    slice_dataset = dataset_with_dataset_model[0:1]
+    assert len(slice_dataset) == 1
\ No newline at end of file
diff --git a/experimental/tests/unit/test_llm.py b/experimental/tests/unit/test_llm.py
new file mode 100644
index 000000000..84aa0b179
--- /dev/null
+++ b/experimental/tests/unit/test_llm.py
@@ -0,0 +1,229 @@
+import pytest
+from unittest.mock import Mock
+from pydantic import BaseModel
+
+from ragas_experimental.llm.llm import RagasLLM, ragas_llm
+
+
+class LLMResponseModel(BaseModel):
+    response: str
+
+
+class MockClient:
+    """Mock client that simulates an LLM client."""
+    
+    def __init__(self, is_async=False):
+        self.is_async = is_async
+        self.chat = Mock()
+        self.chat.completions = Mock()
+        if is_async:
+            async def async_create(*args, **kwargs):
+                return LLMResponseModel(response="Mock response")
+            self.chat.completions.create = async_create
+        else:
+            def sync_create(*args, **kwargs):
+                return LLMResponseModel(response="Mock response")
+            self.chat.completions.create = sync_create
+
+
+class MockInstructor:
+    """Mock instructor client that wraps the base client."""
+    
+    def __init__(self, client):
+        self.client = client
+        self.chat = Mock()
+        self.chat.completions = Mock()
+        
+        if client.is_async:
+            # Async client - create a proper async function
+            async def async_create(*args, **kwargs):
+                return LLMResponseModel(response="Instructor response")
+            self.chat.completions.create = async_create
+        else:
+            # Sync client - create a regular function
+            def sync_create(*args, **kwargs):
+                return LLMResponseModel(response="Instructor response")
+            self.chat.completions.create = sync_create
+
+
+@pytest.fixture
+def mock_sync_client():
+    """Create a mock synchronous client."""
+    return MockClient(is_async=False)
+
+
+@pytest.fixture
+def mock_async_client():
+    """Create a mock asynchronous client.""" 
+    return MockClient(is_async=True)
+
+
+def test_ragas_llm_initialization(mock_sync_client, monkeypatch):
+    """Test RagasLLM initialization with different providers."""
+    # Mock instructor to return our mock instructor
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4",
+        client=mock_sync_client
+    )
+    
+    assert llm.provider == "openai"
+    assert llm.model == "gpt-4"
+    assert llm.client is not None
+    assert not llm.is_async
+
+
+def test_ragas_llm_async_detection(mock_async_client, monkeypatch):
+    """Test that RagasLLM correctly detects async clients."""
+    # Mock instructor to return our mock instructor  
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4",
+        client=mock_async_client
+    )
+    
+    assert llm.is_async
+
+
+def test_ragas_llm_factory_function(mock_sync_client, monkeypatch):
+    """Test the ragas_llm factory function."""
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = ragas_llm(
+        provider="openai",
+        model="gpt-4",
+        client=mock_sync_client,
+        temperature=0.7
+    )
+    
+    assert isinstance(llm, RagasLLM)
+    assert llm.provider == "openai"
+    assert llm.model == "gpt-4"
+    assert llm.model_args.get("temperature") == 0.7
+
+
+def test_unsupported_provider():
+    """Test that unsupported providers raise ValueError."""
+    mock_client = Mock()
+    
+    with pytest.raises(ValueError, match="Unsupported provider: unsupported"):
+        RagasLLM(
+            provider="unsupported", 
+            model="test-model",
+            client=mock_client
+        )
+
+
+def test_sync_llm_generate(mock_sync_client, monkeypatch):
+    """Test sync LLM generation."""
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4", 
+        client=mock_sync_client
+    )
+    
+    result = llm.generate("Test prompt", LLMResponseModel)
+    
+    assert isinstance(result, LLMResponseModel)
+    assert result.response == "Instructor response"
+
+
+@pytest.mark.asyncio
+async def test_async_llm_agenerate(mock_async_client, monkeypatch):
+    """Test async LLM generation."""
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4",
+        client=mock_async_client
+    )
+    
+    result = await llm.agenerate("Test prompt", LLMResponseModel)
+    
+    assert isinstance(result, LLMResponseModel)
+    assert result.response == "Instructor response"
+
+
+def test_sync_client_agenerate_error(mock_sync_client, monkeypatch):
+    """Test that using agenerate with sync client raises TypeError."""
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4",
+        client=mock_sync_client
+    )
+    
+    # Test that agenerate raises TypeError with sync client
+    with pytest.raises(TypeError, match="Cannot use agenerate\\(\\) with a synchronous client"):
+        # Use asyncio.run to handle the coroutine
+        import asyncio
+        asyncio.run(llm.agenerate("Test prompt", LLMResponseModel))
+
+
+def test_provider_support():
+    """Test that all expected providers are supported."""
+    supported_providers = ["openai", "anthropic", "cohere", "gemini", "litellm"]
+    
+    for provider in supported_providers:
+        mock_client = Mock()
+        
+        # Mock the appropriate instructor function
+        import instructor
+        mock_instructor_func = Mock(return_value=MockInstructor(mock_client))
+        setattr(instructor, f"from_{provider}", mock_instructor_func)
+        
+        # This should not raise an error
+        try:
+            llm = RagasLLM(provider=provider, model="test-model", client=mock_client)
+            assert llm.provider == provider
+        except Exception as e:
+            pytest.fail(f"Provider {provider} should be supported but got error: {e}")
+
+
+def test_llm_model_args_storage(mock_sync_client, monkeypatch):
+    """Test that model arguments are properly stored."""
+    def mock_from_openai(client):
+        return MockInstructor(client)
+    
+    monkeypatch.setattr('instructor.from_openai', mock_from_openai)
+    
+    model_args = {
+        "temperature": 0.7,
+        "max_tokens": 1000,
+        "top_p": 0.9
+    }
+    
+    llm = RagasLLM(
+        provider="openai",
+        model="gpt-4",
+        client=mock_sync_client,
+        **model_args
+    )
+    
+    assert llm.model_args == model_args
\ No newline at end of file
diff --git a/experimental/tests/unit/test_metric_base.py b/experimental/tests/unit/test_metric_base.py
new file mode 100644
index 000000000..6d067c4ab
--- /dev/null
+++ b/experimental/tests/unit/test_metric_base.py
@@ -0,0 +1,98 @@
+import typing as t
+from dataclasses import dataclass
+import pytest
+from pydantic import BaseModel
+
+from ragas_experimental.metric.base import Metric
+from ragas_experimental.metric import MetricResult
+
+
+class MetricResponseModel(BaseModel):
+    result: int
+    reason: t.Optional[str] = None
+
+
+@dataclass
+class CustomMetric(Metric):
+    """Custom metric implementation for testing."""
+
+    def __post_init__(self):
+        super().__post_init__()
+        self._response_model = MetricResponseModel
+
+
+@pytest.fixture
+def mock_llm(mock_llm):
+    """Use the mock LLM from conftest."""
+    return mock_llm
+
+
+def test_metric_creation():
+    """Test creating a custom metric."""
+    metric = CustomMetric(name="test_metric", prompt="What is the result of {input}?")
+
+    assert metric.name == "test_metric"
+    assert isinstance(metric.prompt, str) or hasattr(metric.prompt, "format")
+
+
+def test_metric_get_variables():
+    """Test extracting variables from prompt template."""
+    metric = CustomMetric(
+        name="test_metric",
+        prompt="Evaluate the {question} given the {context} and {answer}",
+    )
+
+    variables = metric.get_variables()
+    expected_vars = ["question", "context", "answer"]
+
+    assert set(variables) == set(expected_vars)
+
+
+def test_metric_score_single(mock_llm):
+    """Test scoring with a single input."""
+    metric = CustomMetric(name="test_metric", prompt="What is the result of {input}?")
+
+    # Mock the LLM to return a valid response
+    def mock_generate(prompt, response_model):
+        return response_model(result=1, reason="test reason")
+
+    mock_llm.generate = mock_generate
+
+    result = metric.score(llm=mock_llm, input="test")
+
+    assert isinstance(result, MetricResult)
+    assert result.traces is not None
+    assert "input" in result.traces
+
+
+@pytest.mark.asyncio
+async def test_metric_async_score(mock_llm):
+    """Test async scoring functionality."""
+    metric = CustomMetric(name="test_metric", prompt="What is the result of {input}?")
+
+    # Mock the async LLM method
+    async def mock_agenerate(prompt, response_model):
+        return response_model(result=1, reason="test reason")
+
+    mock_llm.agenerate = mock_agenerate
+
+    result = await metric.ascore(llm=mock_llm, input="test")
+
+    assert isinstance(result, MetricResult)
+    assert result.traces is not None
+
+
+def test_metric_response_model():
+    """Test that metric has correct response model."""
+    metric = CustomMetric(name="test_metric", prompt="What is the result of {input}?")
+
+    assert metric._response_model == MetricResponseModel
+
+
+def test_metric_prompt_conversion():
+    """Test that string prompts are converted to Prompt objects."""
+    metric = CustomMetric(name="test_metric", prompt="What is the result of {input}?")
+
+    # After __post_init__, prompt should be converted to Prompt object
+    assert hasattr(metric.prompt, "format")
+
diff --git a/experimental/tests/unit/test_project_core.py b/experimental/tests/unit/test_project_core.py
new file mode 100644
index 000000000..823a2f91b
--- /dev/null
+++ b/experimental/tests/unit/test_project_core.py
@@ -0,0 +1,108 @@
+import os
+import tempfile
+import pytest
+
+from ragas_experimental.project.core import Project
+
+
+def test_local_backend_creation():
+    """Test creating a project with local backend creates proper directory structure."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        local_project = Project.create(
+            name="test_local_project",
+            description="A test project using local backend",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        # Assert folder exists
+        assert os.path.exists(os.path.join(temp_dir, "test_local_project"))
+        assert os.path.exists(os.path.join(temp_dir, "test_local_project", "datasets"))
+        assert os.path.exists(os.path.join(temp_dir, "test_local_project", "experiments"))
+
+
+def test_local_backend_deletion():
+    """Test deleting a local backend project removes the directory."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        local_project = Project.create(
+            name="test_local_project",
+            description="A test project using local backend",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        project_path = os.path.join(temp_dir, "test_local_project")
+        assert os.path.exists(project_path)
+        
+        local_project.delete()
+        assert not os.path.exists(project_path)
+
+
+def test_project_get_existing():
+    """Test getting an existing project."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        # Create a project
+        local_project = Project.create(
+            name="test_local_project",
+            description="A test project using local backend",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        # Get the project
+        retrieved_project = Project.get(
+            name="test_local_project",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        assert retrieved_project.name == "test_local_project"
+        # Check backend type by checking if it's a LocalCSVProjectBackend
+        from ragas_experimental.project.backends.local_csv import LocalCSVProjectBackend
+        assert isinstance(retrieved_project._backend, LocalCSVProjectBackend)
+
+
+def test_project_get_nonexistent():
+    """Test getting a non-existent project raises ValueError."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        with pytest.raises(ValueError, match="Local project 'nonexistent' does not exist"):
+            Project.get(
+                name="nonexistent",
+                backend="local/csv",
+                root_dir=temp_dir
+            )
+
+
+def test_project_paths():
+    """Test dataset and experiment path generation."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        local_project = Project.create(
+            name="test_local_project",
+            description="A test project using local backend",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        # Test path generation
+        dataset_path = local_project.get_dataset_path("example_dataset")
+        experiment_path = local_project.get_experiment_path("example_experiment")
+        
+        expected_dataset_path = os.path.join(temp_dir, "test_local_project", "datasets", "example_dataset.csv")
+        expected_experiment_path = os.path.join(temp_dir, "test_local_project", "experiments", "example_experiment.csv")
+        
+        assert dataset_path == expected_dataset_path
+        assert experiment_path == expected_experiment_path
+
+
+def test_project_repr():
+    """Test project string representation."""
+    with tempfile.TemporaryDirectory() as temp_dir:
+        local_project = Project.create(
+            name="test_local_project",
+            description="A test project using local backend",
+            backend="local/csv",
+            root_dir=temp_dir
+        )
+        
+        assert "test_local_project" in str(local_project)
+        assert "local/csv" in str(local_project)
\ No newline at end of file
diff --git a/ragas/pyproject.toml b/ragas/pyproject.toml
index 573e47705..73377cd41 100644
--- a/ragas/pyproject.toml
+++ b/ragas/pyproject.toml
@@ -45,7 +45,6 @@ docs = [
 dev = [
     "rich",
     "ruff",
-    "isort",
     "black[jupyter]",
     "pyright",
     "llama_index",
@@ -63,25 +62,19 @@ dev = [
     "haystack-ai",
     "sacrebleu",
     "r2r",
-]
-test = [
     "pytest",
     "pytest-xdist[psutil]",
     "pytest-asyncio",
-    "llama_index",
     "nbmake",
 ]
+test = []
 [tool.setuptools]
 package-dir = {"" = "src"}
 
 [tool.setuptools.dynamic]
 readme = {file = ["README.md"], content-type = "text/markdown"}
 
-[tool.ruff]
-exclude = ["*.ipynb"]
-
-[tool.ruff.lint]
-ignore = ["E501"]
+# Ruff configuration is inherited from workspace.toml at the monorepo root
 
 [build-system]
 requires = ["setuptools>=64", "setuptools_scm>=8"]
diff --git a/ragas/src/ragas/embeddings/haystack_wrapper.py b/ragas/src/ragas/embeddings/haystack_wrapper.py
index 71ac1e978..4dc3501e9 100644
--- a/ragas/src/ragas/embeddings/haystack_wrapper.py
+++ b/ragas/src/ragas/embeddings/haystack_wrapper.py
@@ -37,10 +37,18 @@ def __init__(
         # Lazy Import of required Haystack components
         try:
             from haystack import AsyncPipeline
-            from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder
-            from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder
-            from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder
-            from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder
+            from haystack.components.embedders.azure_text_embedder import (
+                AzureOpenAITextEmbedder,
+            )
+            from haystack.components.embedders.hugging_face_api_text_embedder import (
+                HuggingFaceAPITextEmbedder,
+            )
+            from haystack.components.embedders.openai_text_embedder import (
+                OpenAITextEmbedder,
+            )
+            from haystack.components.embedders.sentence_transformers_text_embedder import (
+                SentenceTransformersTextEmbedder,
+            )
         except ImportError as exc:
             raise ImportError(
                 "Haystack is not installed. Please install it with `pip install haystack-ai`."
@@ -94,10 +102,18 @@ async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]:
 
     def __repr__(self) -> str:
         try:
-            from haystack.components.embedders.azure_text_embedder import AzureOpenAITextEmbedder
-            from haystack.components.embedders.hugging_face_api_text_embedder import HuggingFaceAPITextEmbedder
-            from haystack.components.embedders.openai_text_embedder import OpenAITextEmbedder
-            from haystack.components.embedders.sentence_transformers_text_embedder import SentenceTransformersTextEmbedder
+            from haystack.components.embedders.azure_text_embedder import (
+                AzureOpenAITextEmbedder,
+            )
+            from haystack.components.embedders.hugging_face_api_text_embedder import (
+                HuggingFaceAPITextEmbedder,
+            )
+            from haystack.components.embedders.openai_text_embedder import (
+                OpenAITextEmbedder,
+            )
+            from haystack.components.embedders.sentence_transformers_text_embedder import (
+                SentenceTransformersTextEmbedder,
+            )
         except ImportError:
             return f"{self.__class__.__name__}(embeddings=Unknown(...))"
 
diff --git a/ragas/src/ragas/llms/haystack_wrapper.py b/ragas/src/ragas/llms/haystack_wrapper.py
index 0c92b3c9a..c31df42f1 100644
--- a/ragas/src/ragas/llms/haystack_wrapper.py
+++ b/ragas/src/ragas/llms/haystack_wrapper.py
@@ -39,8 +39,12 @@ def __init__(
         try:
             from haystack import AsyncPipeline
             from haystack.components.generators.azure import AzureOpenAIGenerator
-            from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator
-            from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator
+            from haystack.components.generators.hugging_face_api import (
+                HuggingFaceAPIGenerator,
+            )
+            from haystack.components.generators.hugging_face_local import (
+                HuggingFaceLocalGenerator,
+            )
             from haystack.components.generators.openai import OpenAIGenerator
         except ImportError as exc:
             raise ImportError(
@@ -115,8 +119,12 @@ async def agenerate_text(
     def __repr__(self) -> str:
         try:
             from haystack.components.generators.azure import AzureOpenAIGenerator
-            from haystack.components.generators.hugging_face_api import HuggingFaceAPIGenerator
-            from haystack.components.generators.hugging_face_local import HuggingFaceLocalGenerator
+            from haystack.components.generators.hugging_face_api import (
+                HuggingFaceAPIGenerator,
+            )
+            from haystack.components.generators.hugging_face_local import (
+                HuggingFaceLocalGenerator,
+            )
             from haystack.components.generators.openai import OpenAIGenerator
         except ImportError:
             return f"{self.__class__.__name__}(llm=Unknown(...))"
diff --git a/ragas/tests/unit/test_testset_schema.py b/ragas/tests/unit/test_testset_schema.py
index 5e7554156..84574b9da 100644
--- a/ragas/tests/unit/test_testset_schema.py
+++ b/ragas/tests/unit/test_testset_schema.py
@@ -6,8 +6,8 @@
     MultiTurnSample,
     SingleTurnSample,
 )
-from ragas.testset.synthesizers.testset_schema import Testset as RagasTestset
 from ragas.testset.synthesizers.testset_schema import (
+    Testset as RagasTestset,
     TestsetSample as RagasTestsetSample,
 )
 
diff --git a/workspace.toml b/workspace.toml
index e5c1bd262..2bf78a75b 100644
--- a/workspace.toml
+++ b/workspace.toml
@@ -1,15 +1,20 @@
 [workspace]
-# This file configures the workspace-wide tools and settings
-# Each project still maintains its own pyproject.toml for package-specific config
+# This file configures the workspace-wide tools and settings for the Ragas monorepo
+# All code quality tools (ruff, black, pyright) inherit from these centralized settings
+# Individual projects only override when absolutely necessary for package-specific needs
 
 [tool.ruff]
 select = ["E", "F", "I"]
 ignore = ["E501"]  # Line length handled by formatter
 line-length = 88
 target-version = "py39"
+exclude = ["*.ipynb"]  # Exclude Jupyter notebooks from linting
 
 [tool.ruff.lint.isort]
+# Import sorting configuration for the entire monorepo
 known-first-party = ["ragas", "ragas_experimental"]
+force-single-line = false
+combine-as-imports = true
 
 [tool.black]
 line-length = 88