diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2687b62..1930320 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -26,7 +26,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -34,17 +37,17 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync - + just sync + - name: Format code - run: make format + run: just format lint: needs: format @@ -52,7 +55,10 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -60,24 +66,27 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync - + just sync + - name: Run linters - run: make lint + run: just lint type-check: if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || (github.event_name == 'pull_request' && github.event.action != 'closed') runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -85,24 +94,27 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync - + just sync + - name: Run type checks - run: make type-check + run: just type-check test: if: (github.event_name == 'push' && github.ref == 'refs/heads/main') || (github.event_name == 'pull_request' && github.event.action != 'closed') runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -110,17 +122,17 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync - + just sync + - name: Run tests - run: make test-unit + run: just test-unit merged-test: if: github.event_name == 'pull_request' && github.event.action != 'closed' @@ -130,7 +142,10 @@ jobs: with: ref: ${{ github.event.pull_request.merge_commit_sha }} fetch-depth: 0 - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -138,17 +153,17 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync - + just sync + - name: Run tests - run: make test-unit + run: just test-unit release: needs: [format, lint, type-check, test] @@ -161,7 +176,10 @@ jobs: with: fetch-depth: 0 token: ${{ secrets.RELEASE_TOKEN }} - + + - name: Install just + uses: extractions/setup-just@v2 + - name: Install uv uses: astral-sh/setup-uv@v3 with: @@ -169,25 +187,25 @@ jobs: cache-dependency-glob: | pyproject.toml *.py - + - name: Set up Python run: uv python install 3.12 - + - name: Install dependencies run: | uv venv --python 3.12 - make sync + just sync uv pip install python-semantic-release - + - name: Configure Git run: | git config user.name "GitHub Actions" git config user.email "actions@github.com" - + - name: Bump version and publish release env: GH_TOKEN: ${{ secrets.RELEASE_TOKEN }} run: | uv run semantic-release version git push --follow-tags origin ${{ github.ref_name }} - uv run semantic-release publish \ No newline at end of file + uv run semantic-release publish diff --git a/.gitignore b/.gitignore index e628185..3a42264 100644 --- a/.gitignore +++ b/.gitignore @@ -268,3 +268,8 @@ TODO.md notes.md scratch/ .serena/ + +# Workflow files (introduce later) +workflows/PROJECT_INIT_WORKFLOW.md +context/PRODUCT.md +context/ENGINEERING.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 615a65b..d92d612 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,28 +3,28 @@ repos: hooks: - id: format-check name: Format code with ruff - entry: make format + entry: just format language: system stages: [pre-commit] verbose: true pass_filenames: false - id: lint-check name: Run ruff linter - entry: make lint + entry: just lint language: system stages: [pre-commit] verbose: true pass_filenames: false - id: type-check name: Run mypy type checker - entry: make type-check + entry: just type-check language: system stages: [pre-commit] verbose: true pass_filenames: false - id: unit-tests name: Run unit tests - entry: make test-unit + entry: just test-unit language: system stages: [pre-commit] verbose: true diff --git a/Makefile b/Makefile deleted file mode 100644 index 5ec7e06..0000000 --- a/Makefile +++ /dev/null @@ -1,136 +0,0 @@ -.PHONY: default help clean-project init clean-env sync format lint type-check test test-unit test-functional test-integration test-all validate-branch run - -GREEN_LINE=@echo "\033[0;32m--------------------------------------------------\033[0m" - -SOURCE_DIR = src/ -TEST_DIR = tests/ -PROJECT_VERSION := $(shell awk '/^\[project\]/ {flag=1; next} /^\[/{flag=0} flag && /^version/ {gsub(/"/, "", $$2); print $$2}' pyproject.toml) -PYTHON_VERSION := 3.12 - -default: help - -help: ## Display this help message - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-35s\033[0m %s\n", $$1, $$2}' - -# ---------------------------- -# Environment Management -# ---------------------------- - -init: ## Set up Python version, venv, and install dependencies - @echo "πŸ”§ Installing uv if missing..." - @if ! command -v uv >/dev/null 2>&1; then \ - echo "πŸ“¦ Installing uv..."; \ - python3 -m pip install --user --upgrade uv; \ - else \ - echo "βœ… uv is already installed"; \ - fi - @echo "🐍 Setting up Python $(PYTHON_VERSION) environment..." - uv python install $(PYTHON_VERSION) - uv venv --python $(PYTHON_VERSION) .venv - @echo "πŸ“¦ Installing project dependencies..." - uv sync --extra dev - . .venv/bin/activate && uv pip install -e . - @echo "πŸ”— Setting up pre-commit hooks..." - @if [ -f .pre-commit-config.yaml ]; then \ - uv run pre-commit install; \ - echo "βœ… Pre-commit hooks installed"; \ - else \ - echo "⚠️ No .pre-commit-config.yaml found, skipping pre-commit setup"; \ - fi - @echo "πŸŽ‰ Environment setup complete!" - -sync: ## Sync project dependencies - @echo "Syncing project dependencies..." - uv sync --extra dev - $(GREEN_LINE) - -clean-project: ## Clean Python caches and tooling artifacts - @echo "Cleaning project caches..." - find . -type d \( -name '.pytest_cache' -o -name '.ruff_cache' -o -name '.mypy_cache' -o -name '__pycache__' \) -exec rm -rf {} + - $(GREEN_LINE) - -clean-env: ## Remove the virtual environment folder - @echo "Deleting virtual environment..." - rm -rf .venv - $(GREEN_LINE) - - - -# ---------------------------- -# Code Quality -# ---------------------------- - -format: ## Format codebase using ruff - @echo "Formatting code with ruff..." - uv run ruff format - $(GREEN_LINE) - -lint: ## Lint code using ruff and autofix issues - @echo "Running lint checks with ruff..." - uv run ruff check . --fix - $(GREEN_LINE) - -type-check: ## Perform static type checks using mypy - @echo "Running type checks with mypy..." - uv run --extra dev mypy $(SOURCE_DIR) - $(GREEN_LINE) - -# ---------------------------- -# Tests -# ---------------------------- - -test-unit: ## Run unit tests with pytest - @echo "Running UNIT tests with pytest..." - uv run python -m pytest -vv --verbose -s $(TEST_DIR) - $(GREEN_LINE) - -test-functional: ## Run functional tests with pytest - @echo "Running FUNCTIONAL tests with pytest..." - uv run python -m pytest -m functional -vv --verbose -s $(TEST_DIR) - $(GREEN_LINE) - -test-integration: ## Run integration tests with pytest - @echo "Running INTEGRATION tests with pytest..." - uv run python -m pytest -m integration -vv --verbose -s $(TEST_DIR) - $(GREEN_LINE) - -test: ## Run standard tests with coverage report (excludes integration) - @echo "Running tests with pytest..." - uv run python -m pytest -m "not integration" -vv -s $(TEST_DIR) \ - --cov=src \ - --cov-config=pyproject.toml \ - --cov-fail-under=80 \ - --cov-report=term-missing - $(GREEN_LINE) - -test-all: ## Run all tests including integration tests - @echo "Running ALL tests with pytest..." - uv run python -m pytest -vv -s $(TEST_DIR) \ - --cov=src \ - --cov-config=pyproject.toml \ - --cov-fail-under=80 \ - --cov-report=term-missing - $(GREEN_LINE) - -# ---------------------------- -# Branch Validation -# ---------------------------- - -validate-branch: ## Run formatting, linting, type checks, and tests - @echo "πŸ” Running branch validation..." - $(MAKE) format - $(MAKE) lint - $(MAKE) type-check - $(MAKE) test - @echo "πŸŽ‰ Branch validation successful - ready for PR!" - $(GREEN_LINE) - -# ---------------------------- -# Run Application -# ---------------------------- - -run: ## Run the main application module - @echo "πŸš€ Running main application..." - uv run python -m src.main - $(GREEN_LINE) - diff --git a/README.md b/README.md index 6fb0530..a981669 100644 --- a/README.md +++ b/README.md @@ -1,192 +1,211 @@ -# AI Base Template: Production-First AI Engineering +# Python Agentic Template -> Based on [A Production-First Approach to AI Engineering](https://aienhancedengineer.substack.com/p/a-production-first-approach-to-ai) - a methodology for building reliable AI systems. +> Describe what you want to build. Let agents build it. -## 🎯 Why This Template? +Autonomous multi-agent Python project template. -**The Problem:** Most AI projects fail when moving from prototype to production. Research notebooks that work brilliantly in development fail catastrophically under real-world conditionsβ€”latency spikes, cost spirals, non-deterministic failures, and maintenance nightmares. +## Why This Template? -**The Root Cause:** The AI industry focuses 90% on model development and 10% on the infrastructure needed for production. This ratio should be reversed. Production AI systems require engineering discipline, not just algorithmic innovation. +**The Problem:** Starting AI/ML projects requires extensive setupβ€”architecture decisions, project structure, testing patterns, CI/CD, logging, and more. Most developers copy-paste from old projects or spend days configuring from scratch. -**The Solution:** This template provides a production-ready foundation for AI projects, embodying the principle that *"Research optimizes for possibility. Engineering optimizes for reliability."* +**The Solution:** This template **bootstraps itself** into a complete, production-ready project through a multi-agent workflow. You describe your project in plain language; agents research, plan, and build it. -## πŸ—οΈ What This Template Provides +## How It Works -A **modern Python foundation** designed for AI systems that need to work reliably in production: - -- **Modern Python Tooling** - Python 3.12+, FastAPI, Pydantic, type hints throughout -- **Production Logging** - Structured JSON logging with correlation tracking and dual-mode rendering -- **Development Automation** - Pre-configured linting, formatting, testing, and validation -- **Production-Ready Structure** - Organized for maintainability and scaling -- **Comprehensive Testing** - Unit, functional, and integration test patterns (21+ logging tests included) -- **CI/CD Ready** - GitHub Actions, pre-commit hooks, semantic versioning -- **Documentation Standards** - Clear guides for development and deployment - -This isn't another ML experiment templateβ€”it's an engineering foundation for AI systems that need to work reliably at scale. - -## ⚑ Quick Start - -```bash -# Clone the production-ready foundation -git clone my-ai-service -cd my-ai-service - -# Set up the complete development environment -make init - -# Verify everything works -make validate-branch +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ YOU: Fill context/PRODUCT.md + context/ENGINEERING.md β”‚ +β”‚ (Describe what you're building and technical preferences) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHASE 0: Agent Discovery β”‚ +β”‚ Claude Code finds available specialists and maps β”‚ +β”‚ them to roles (research, architecture, implementation,β”‚ +β”‚ review) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHASE 1: Research β”‚ +β”‚ Expands your seeds into full PRD β”‚ +β”‚ Researches best practices, grades evidence β”‚ +β”‚ β†’ context/PRD.md, context/RESEARCH_SYNTHESIS.md β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ [User Approval] +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHASE 2: Architecture β”‚ +β”‚ Creates ADRs and project plan β”‚ +β”‚ Defines MVP scope with MoSCoW prioritization β”‚ +β”‚ β†’ ADR.md, context/PROJECT_PLAN.md β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ [User Approval] +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHASE 3: MVP Implementation β”‚ +β”‚ Builds must-have features with tests β”‚ +β”‚ Per deliverable: IMPLEMENT β†’ REVIEW β†’ FIX β†’ PASS β”‚ +β”‚ Review enforces 80% coverage, test quality β”‚ +β”‚ β†’ Working code in src/ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + ↓ [User Approval] +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHASE 4: Feature Enhancement β”‚ +β”‚ Adds features from roadmap (same validation loop) β”‚ +β”‚ Each feature: IMPLEMENT β†’ REVIEW β†’ FIX β†’ User Approvalβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ ``` -You now have a production-ready Python service foundation. Add your AI logic on top of this reliable base. +**Human-in-the-loop**: You approve each phase before continuing. No runaway automation. -## πŸ”§ The Production-First Philosophy +## Quick Start -### Research vs. Production Mindset +1. **Create your repository**: Click "Use this template" on GitHub +2. **Set up environment**: `just init` +3. **Fill out your seeds**: Edit `context/PRODUCT.md` and `context/ENGINEERING.md` +4. **Start brewing**: In Claude Code, say `"Run the project initialization workflow"` -**Research Approach:** -- Optimize for accuracy and novel algorithms -- Success = high F1 scores, paper publications -- Acceptable to fail fast and iterate -- Focus on the happy path +### Filling Out Seeds -**Production-First Approach:** -- Optimize for reliability and maintainability -- Success = uptime, cost efficiency, user satisfaction -- Must handle edge cases gracefully -- Plan for failure from the start +| File | What to Include | +|------|-----------------| +| `context/PRODUCT.md` | What you're building, for whom, why, success criteria | +| `context/ENGINEERING.md` | Technical preferences, constraints, architecture ideas | -### The 90/10 Rule +**Tips for better results**: +- Be specific about the problem: "Users waste 2 hours/day on X" > "Users have problems" +- Define success measurably: "50% reduction in Y" > "Improve Y" +- State constraints clearly: "Must run on GCP" > "Cloud deployment" -In production AI systems: -- **10%** of your code is the actual AI/ML logic -- **90%** is infrastructure: validation, monitoring, error handling, cost controls, testing +See `workflows/PROJECT_INIT_WORKFLOW.md` for the complete workflow specification. -This template provides that crucial 90% foundation. +## What You Get -## πŸ› οΈ Development Workflow +Beyond the autonomous workflow, this template provides a **production-ready foundation**: -### Essential Commands +### Modern Python Tooling +- Python 3.12+, FastAPI, Pydantic +- Type hints throughout +- uv for fast dependency management -```bash -# Environment management -make init # Complete development setup -make sync # Update dependencies -make clean-env # Reset environment +### Production Logging +- Structured JSON logging with structlog +- Correlation ID tracking across requests +- Dual-mode: human-readable (dev) / JSON (prod) -# Code quality -make format # Auto-format code -make lint # Fix linting issues -make type-check # Validate type hints -make validate-branch # Run all checks before committing +### Development Automation +- Pre-configured linting (Ruff), formatting (Black), type checking (mypy) +- Pre-commit hooks for quality gates +- `just validate-branch` runs all checks -# Testing -make test # Standard test suite -make test-unit # Fast unit tests -make test-functional # Feature tests -make test-integration # Integration tests -make test-all # Complete test suite -``` +### Testing Patterns +- Unit, functional, and integration test structure +- pytest with markers for test organization +- 21+ logging system tests included as examples + +### CI/CD Ready +- GitHub Actions workflows +- Semantic versioning +- Docker-ready structure -### Project Structure +## Project Structure ``` -ai-base-template/ -β”œβ”€β”€ src/ # Your service code goes here -β”‚ β”œβ”€β”€ __init__.py -β”‚ β”œβ”€β”€ main.py # Simple starting point with logging integration -β”‚ └── logging.py # Production structured logging system -β”œβ”€β”€ tests/ # Comprehensive test suite -β”‚ β”œβ”€β”€ test_main.py # Example test patterns -β”‚ └── test_logging.py # 21+ logging system tests -β”œβ”€β”€ research/ # Notebooks and experiments -β”‚ └── EDA.ipynb # Exploratory work stays here -β”œβ”€β”€ Makefile # All automation commands -β”œβ”€β”€ pyproject.toml # Modern Python configuration -└── CLAUDE.md # Detailed development guide +my-project/ +β”œβ”€β”€ context/ # Project seeds + workflow outputs +β”‚ β”œβ”€β”€ PRODUCT.md # Your product requirements (seed) +β”‚ β”œβ”€β”€ ENGINEERING.md # Your technical preferences (seed) +β”‚ β”œβ”€β”€ PRD.md # Expanded PRD (generated) +β”‚ β”œβ”€β”€ RESEARCH_SYNTHESIS.md # Research findings (generated) +β”‚ └── PROJECT_PLAN.md # MVP scope + roadmap (generated) +β”œβ”€β”€ workflows/ # Autonomous workflow system +β”‚ β”œβ”€β”€ PROJECT_INIT_WORKFLOW.md # Complete workflow specification +β”‚ └── templates/ # Output format contracts +β”œβ”€β”€ src/ # Your service code +β”‚ β”œβ”€β”€ __init__.py +β”‚ β”œβ”€β”€ main.py # Entry point with logging demo +β”‚ └── logging.py # Production logging system +β”œβ”€β”€ tests/ # Test suite +β”‚ β”œβ”€β”€ test_main.py +β”‚ └── test_logging.py # 21+ logging tests +β”œβ”€β”€ research/ # Notebooks and experiments +β”œβ”€β”€ ADR.md # Architecture decisions (generated) +β”œβ”€β”€ justfile # All automation commands +└── pyproject.toml # Project configuration ``` -## πŸŽ“ Who Should Use This Template +## Development Commands -### Senior Engineers New to AI -Start with a solid engineering foundation while learning AI concepts. The template provides the safety rails you're accustomed to in production systems. +```bash +just # Show all available commands -### AI Engineers Moving to Production -Stop reinventing infrastructure. Focus on your models while using battle-tested patterns for the production wrapper. +# Environment +just init # Complete development setup +just sync # Update dependencies +just clean-env # Reset environment -### Technical Leaders -Give your team a consistent, production-ready starting point that embodies engineering best practices from day one. +# Code Quality +just format # Auto-format code +just lint # Fix linting issues +just type-check # Validate type hints +just validate-branch # Run all checks (required before commits) -## πŸ“Š Production Logging System +# Testing +just test # Standard test suite +just test-unit # Fast unit tests +just test-functional # Feature tests +just test-integration # Integration tests +just test-all # Complete test suite +``` -This template includes a **production-grade structured logging system** built with structlog that handles the observability requirements of real-world AI systems. +## The Production-First Philosophy -### Dual-Mode Logging +This template embodies the principle that **production AI requires engineering discipline**: -**Development Mode** - Human-readable format optimized for local debugging: -```bash -22:45:00 [INFO] api.handlers: Processing request [status_code=200, duration_ms=150, user_id=user-123] [id:req-abc1] -``` +- **90% infrastructure, 10% model code**: Most production AI is validation, monitoring, error handling, and cost controlsβ€”not algorithms +- **Reliability over novelty**: Production systems must work consistently, not just impressively +- **Plan for failure**: Every external call needs error handling; every assumption needs validation -**Production Mode** - Structured JSON for monitoring and analytics: -```json -{ - "timestamp": "2025-08-31T22:45:00.123Z", - "level": "info", - "logger": "src.api.handlers", - "message": "Processing request", - "context": "default", - "extra": { - "status_code": 200, - "duration_ms": 150, - "user_id": "user-123", - "correlation_id": "req-abc-123" - } -} -``` +The autonomous workflow ensures these patterns are built in from the start, not bolted on later. -### Key Capabilities +## Who Should Use This -- **Correlation ID Tracking** - Automatically trace requests across your entire system -- **Context Isolation** - Prevent data leakage between concurrent requests and operations -- **Smart Field Organization** - Separates standard fields from custom data for optimal readability -- **Environment-Driven Configuration** - Dynamic log levels and format switching via environment variables -- **Edge Case Handling** - Graceful handling of long values, special characters, and null data +### Teams Starting AI/ML Projects +Stop reinventing infrastructure. Describe your project and let agents build a production-ready foundation. -### Usage Example +### Senior Engineers New to AI +Get the safety rails you're accustomed to in production systems while learning AI concepts. -See `src/main.py` for a complete demonstration of the logging system in action, including context binding, multi-function logging, and both development and production formatting modes. +### Technical Leaders +Give your team a consistent, production-ready starting point that embodies engineering best practices. -## πŸ“š Learn More +## Learn More -### Core Methodology -- [A Production-First Approach to AI Engineering](https://aienhancedengineer.substack.com/p/a-production-first-approach-to-ai) - The article that inspired this template +### This Template +- `workflows/PROJECT_INIT_WORKFLOW.md` - Complete workflow specification +- `workflows/templates/` - Output format examples ### Production AI Engineering -- [Google's Rules for ML](https://developers.google.com/machine-learning/guides/rules-of-ml) - Engineering discipline for ML systems -- [Hidden Technical Debt in ML Systems](https://papers.nips.cc/paper/5656-hidden-technical-debt-in-machine-learning-systems.pdf) - Foundational NIPS paper +- [A Production-First Approach to AI Engineering](https://aienhancedengineer.substack.com/p/a-production-first-approach-to-ai) +- [Google's Rules for ML](https://developers.google.com/machine-learning/guides/rules-of-ml) +- [Hidden Technical Debt in ML Systems](https://papers.nips.cc/paper/5656-hidden-technical-debt-in-machine-learning-systems.pdf) -### Technologies Used +### Technologies - [FastAPI](https://fastapi.tiangolo.com/) - Modern Python web framework -- [Pydantic](https://docs.pydantic.dev/) - Data validation using type annotations -- [structlog](https://www.structlog.org/) - Structured logging for production systems -- [uv](https://docs.astral.sh/uv/) - Modern Python package management - -## 🀝 Contributing +- [Pydantic](https://docs.pydantic.dev/) - Data validation +- [structlog](https://www.structlog.org/) - Structured logging +- [uv](https://docs.astral.sh/uv/) - Fast Python package management -This template embodies battle-tested patterns from production AI systems. When contributing, prioritize: +## Contributing +When contributing, prioritize: 1. **Reliability over features** 2. **Simplicity over cleverness** 3. **Documentation over assumptions** 4. **Tests over trust** -## πŸ“„ License +## License -Apache License 2.0 - See [LICENSE](LICENSE) file for details. +Apache License 2.0 - See [LICENSE](LICENSE) file. --- -**Remember:** The hardest part of AI isn't the algorithmsβ€”it's making them work reliably in production. This template gives you a head start on that challenge. - -*"The best AI is the AI that works."* +*"Describe what you want. Let agents build it."* diff --git a/justfile b/justfile new file mode 100644 index 0000000..42cb717 --- /dev/null +++ b/justfile @@ -0,0 +1,146 @@ +# Project task runner +# Run `just` or `just --list` to see available recipes + +set dotenv-load := false +set shell := ["bash", "-cu"] + +# Configuration +python_version := "3.12" +source_dir := "src/" +test_dir := "tests/" + + +# Display available recipes +default: + @just --list + +# ---------------------------- +# Environment Management +# ---------------------------- + +# Set up Python version, venv, and install dependencies +init: + #!/usr/bin/env bash + set -euo pipefail + echo "πŸ”§ Installing uv if missing..." + if ! command -v uv >/dev/null 2>&1; then + echo "πŸ“¦ Installing uv..." + python3 -m pip install --user --upgrade uv + else + echo "βœ… uv is already installed" + fi + echo "🐍 Setting up Python {{ python_version }} environment..." + uv python install {{ python_version }} + uv venv --python {{ python_version }} .venv + echo "πŸ“¦ Installing project dependencies..." + uv sync --extra dev + . .venv/bin/activate && uv pip install -e . + echo "πŸ”— Setting up pre-commit hooks..." + if [ -f .pre-commit-config.yaml ]; then + uv run pre-commit install + echo "βœ… Pre-commit hooks installed" + else + echo "⚠️ No .pre-commit-config.yaml found, skipping pre-commit setup" + fi + echo "πŸŽ‰ Environment setup complete!" + +# Sync project dependencies +sync: + @echo "Syncing project dependencies..." + uv sync --extra dev + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Clean Python caches and tooling artifacts +clean-project: + @echo "Cleaning project caches..." + find . -type d \( -name '.pytest_cache' -o -name '.ruff_cache' -o -name '.mypy_cache' -o -name '__pycache__' \) -exec rm -rf {} + + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Remove the virtual environment folder +clean-env: + @echo "Deleting virtual environment..." + rm -rf .venv + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# ---------------------------- +# Code Quality +# ---------------------------- + +# Format codebase using ruff +format: + @echo "Formatting code with ruff..." + uv run ruff format + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Lint code using ruff and autofix issues +lint: + @echo "Running lint checks with ruff..." + uv run ruff check . --fix + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Perform static type checks using mypy +type-check: + @echo "Running type checks with mypy..." + uv run --extra dev mypy {{ source_dir }} + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# ---------------------------- +# Tests +# ---------------------------- + +# Run unit tests with pytest +test-unit: + @echo "Running UNIT tests with pytest..." + uv run python -m pytest -vv --verbose -s {{ test_dir }} + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Run functional tests with pytest +test-functional: + @echo "Running FUNCTIONAL tests with pytest..." + uv run python -m pytest -m functional -vv --verbose -s {{ test_dir }} + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Run integration tests with pytest +test-integration: + @echo "Running INTEGRATION tests with pytest..." + uv run python -m pytest -m integration -vv --verbose -s {{ test_dir }} + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Run standard tests with coverage report (excludes integration) +test: + @echo "Running tests with pytest..." + uv run python -m pytest -m "not integration" -vv -s {{ test_dir }} \ + --cov=src \ + --cov-config=pyproject.toml \ + --cov-fail-under=80 \ + --cov-report=term-missing + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# Run all tests including integration tests +test-all: + @echo "Running ALL tests with pytest..." + uv run python -m pytest -vv -s {{ test_dir }} \ + --cov=src \ + --cov-config=pyproject.toml \ + --cov-fail-under=80 \ + --cov-report=term-missing + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# ---------------------------- +# Branch Validation +# ---------------------------- + +# Run formatting, linting, type checks, and tests +validate-branch: format lint type-check test + @echo "πŸŽ‰ Branch validation successful - ready for PR!" + @printf '\033[0;32m--------------------------------------------------\033[0m\n' + +# ---------------------------- +# Run Application +# ---------------------------- + +# Run the main application module +run: + @echo "πŸš€ Running main application..." + uv run python -m src.main + @printf '\033[0;32m--------------------------------------------------\033[0m\n' diff --git a/pyproject.toml b/pyproject.toml index 6e41757..8b96f9a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,13 @@ [project] -name = "ai-base-template" -version = "0.4.0" -description = "Production-first AI engineering template - Reliable infrastructure for deploying AI systems at scale" +name = "python-agentic-template" +version = "0.1.0" +description = "Autonomous multi-agent Python project template - Describe what you want, let agents build it" authors = [{name="Leopoldo Garcia Vargas", email="lk13.dev@gmail.com"}] requires-python = ">=3.12" license = {text = "Apache-2.0"} dependencies = [ - # API framework - "fastapi>=0.95.0", - "uvicorn[standard]>=0.22.0", - # Data science and ML - # "numpy>=1.25.0", - # "pandas>=2.1.0", - # "scikit-learn>=1.3.0", - # ML models - # "xgboost>=1.7.0", - # "lightgbm>=4.0.0", - # "torch>=2.1.0", - # Utilities - # "shap>=0.48.0", + # Core utilities "python-dotenv>=1.0.0", - "loguru>=0.7.0", - # Core dependencies "pydantic>=2.11.5", "pydantic-settings>=2.9.1", "structlog>=25.4.0", diff --git a/src/__init__.py b/src/__init__.py index c3647b3..5282c0e 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -1,3 +1,3 @@ -"""AI Flora Mind - Python AI Package""" +"""Python Agentic Template - Autonomous multi-agent Python project template""" -__version__ = "0.3.0" +__version__ = "0.1.0" diff --git a/src/main.py b/src/main.py deleted file mode 100644 index 933c8a4..0000000 --- a/src/main.py +++ /dev/null @@ -1,42 +0,0 @@ -"""Main module for AI Base Template service.""" - -from .logging import configure_structlog, get_logger - -# Configure logging -# Note: testing=True enables human-readable format, testing=False uses JSON format -configure_structlog(testing=True) -logger = get_logger(__name__) - - -def hello_world() -> str: - logger.info("hello_world function called") - result = "Hello from AI Base Template!" - logger.info("hello_world function returning result", result=result) - return result - - -def get_version() -> str: - logger.info("get_version function called") - from . import __version__ - - logger.info("Version retrieved", version=__version__) - return __version__ - - -def main() -> None: - """Main entry point to demonstrate logging functionality.""" - logger.info("Application starting") - - # Test hello_world function - greeting = hello_world() - logger.info("Received greeting", greeting=greeting) - - # Test get_version function - version = get_version() - logger.info("Application version check complete", version=version) - - logger.info("Application finished successfully") - - -if __name__ == "__main__": - main() diff --git a/tests/test_main.py b/tests/test_main.py deleted file mode 100644 index c2b96d0..0000000 --- a/tests/test_main.py +++ /dev/null @@ -1,34 +0,0 @@ -"""Test module for AI Base Template main functionality""" - -import pytest - -from src import __version__ -from src.main import get_version, hello_world - - -def test_hello_world(): - result = hello_world() - assert result == "Hello from AI Base Template!" - assert isinstance(result, str) - - -def test_get_version(): - version = get_version() - assert version == "0.3.0" - assert isinstance(version, str) - - -@pytest.mark.unit -def test_hello_world_unit(): - assert hello_world() == "Hello from AI Base Template!" - - -@pytest.mark.functional -def test_package_functionality(): - """Functional test for basic package functionality.""" - # Test that we can import and use the package - assert __version__ == "0.3.0" - - # Test main functions work - assert hello_world() == "Hello from AI Base Template!" - assert get_version() == "0.3.0" diff --git a/workflows/PROJECT_INIT_WORKFLOW.md b/workflows/PROJECT_INIT_WORKFLOW.md new file mode 100644 index 0000000..7f0590e --- /dev/null +++ b/workflows/PROJECT_INIT_WORKFLOW.md @@ -0,0 +1,628 @@ +# Project Initialization Workflow + +> **Runtime**: Claude Code +> **Recommended Orchestrator**: Opus (for multi-agent coordination) +> **Sub-agents**: Sonnet or Haiku (for specialized tasks) + +Transform this template into a project-specific system through agent-orchestrated phases with user approval gates. + +## Overview + +| Aspect | Description | +|--------|-------------| +| **Purpose** | Transform template into project-specific system | +| **Pattern** | Sequential agent orchestration with incremental complexity | +| **Approval** | User approval required at each phase gate | + +## Agent Role Definitions + +This workflow uses **abstract roles** instead of specific agent names for portability. During Phase 0, Claude Code discovers available specialists and maps them to these roles. + +| Role | Capability | Phase | Example Agents | +|------|------------|-------|----------------| +| `research` | Web research, evidence synthesis, context gathering | 1 | context-engineer, researcher | +| `architecture` | ADRs, system design, MVP scoping | 2 | project-architect, ml-systems-architect | +| `implementation` | Code writing, testing, domain expertise | 3-4 | ai-engineer, backend-engineer, data-engineer, agentic-engineer | +| `review` | Test enforcement, code quality auditing | 3-4 | python-code-quality-auditor | + +**Role Matching Keywords**: + +| Role | Keywords in Agent Description | +|------|-------------------------------| +| research | research, context, synthesis, investigation, analysis | +| architecture | architect, design, planning, ADR, system, infrastructure | +| implementation | engineer, developer, implement, code, build | +| review | review, audit, quality, test, enforce, validate, check, gate | + +**Note**: The `implementation` role supports **multiple specialists** - different agents can be assigned to different features based on domain expertise. + +--- + +## Discovered Agents + +*This section is updated in place during Phase 0.* + +| Role | Selected Agent(s) | Source | Status | +|------|-------------------|--------|--------| +| research | [pending discovery] | - | ⏳ | +| architecture | [pending discovery] | - | ⏳ | +| implementation | [pending discovery] | - | ⏳ | +| review | [pending discovery] | - | ⏳ | + +--- + +## Phase 0: Agent Discovery + +Before starting the workflow, Claude Code discovers available specialists and maps them to roles. + +**Actions**: +1. Scan for agents at: + - Project level: `.claude/agents/*.md` + - User level: `~/.claude/agents/*.md` +2. Read agent descriptions and match to roles using keywords +3. Present candidates to user for approval +4. Update "Discovered Agents" section above with selections + +**User Approval**: + +Claude Code presents discovered agents: +``` +Found specialists for this workflow: + +research: context-engineer (user) - "Web research and evidence synthesis" +architecture: project-architect (user) - "ADRs and system design" +implementation: ai-engineer (user) - "RAG and LLM integration" + backend-engineer (user) - "APIs and databases" +review: python-code-quality-auditor (user) - "Code quality and test enforcement" + +Approve these selections? [Y/n/modify] +``` + +**If No Matches Found**: + +``` +⚠️ WARNING: No specialist found for role [research]. + Workflow will proceed with generalist Claude Code. + Consider creating a specialist agent for better results. +``` + +**After Discovery**: + +Update the "Discovered Agents" table above, then proceed to Prerequisites. + +--- + +## Inter-Phase Communication + +Templates in `workflows/templates/` serve as **structured contracts** between phases: + +| Template | Output Location | Produced By | Consumed By | +|----------|-----------------|-------------|-------------| +| `TEMPLATE-PRD.md` | `context/PRD.md` | `research` role (Phase 1) | `architecture` role (Phase 2) | +| `TEMPLATE-ADR.md` | `ADR.md` (root) | `architecture` role (Phase 2) | `implementation` role (Phase 3) | +| `TEMPLATE-PROJECT-PLAN.md` | `context/PROJECT_PLAN.md` | `architecture` role (Phase 2) | `implementation` role (Phase 3-4) | + +Templates remain in `workflows/templates/` as reusable scaffolds. + +**Why templates matter:** +- **Predictable handoffs**: Each phase knows exactly what format to expect +- **Agent independence**: Agents don't need to negotiate formats +- **Human readability**: User can review outputs at each approval gate + +--- + +## Prerequisites + +Before running this workflow: + +1. **Fill out seed documents** in `context/`: + - `context/PRODUCT.md` - Product/business perspective (what & why) + - `context/ENGINEERING.md` - Technical perspective (how & constraints) + +2. **Run Phase 0**: Discover and confirm available specialists (or accept generalist fallback) + +3. **Support files** in `workflows/`: + - Templates for agent outputs + - Agent specifications + +--- + +## Phase 1: Context Research + +**Role**: `research` +**Agent**: *[See Discovered Agents table]* + +**Input**: +- `context/PRODUCT.md` - User-provided product seed +- `context/ENGINEERING.md` - User-provided engineering seed + +**Actions**: +1. Read user-provided seeds +2. Research technologies, patterns, best practices +3. Grade evidence quality (High/Medium/Low confidence) +4. Expand seeds into full PRD using `workflows/templates/TEMPLATE-PRD.md` +5. Synthesize findings into structured document + +**Output**: +- `context/PRD.md` - Expanded PRD based on product seed +- `context/RESEARCH_SYNTHESIS.md` - Research synthesis + +**Success Criteria**: Research synthesis with sourced, graded claims + +### [APPROVAL GATE 1] + +User reviews: +- Expanded PRD in `context/PRD.md` +- Research synthesis document + +**Options**: +- βœ… **Approve** - Proceed to Phase 2 +- πŸ”„ **Request more research** - Specify areas needing deeper investigation +- ✏️ **Modify scope** - Adjust seeds and re-run + +--- + +## Phase 2: Architecture Planning + +**Role**: `architecture` +**Agent**: *[See Discovered Agents table]* + +**Input**: +- `context/PRD.md` - Expanded PRD from Phase 1 +- Research synthesis from Phase 1 +- `context/ENGINEERING.md` - Technical preferences +- Template skeleton (pyproject.toml, ADR.md, README.md, src/) + +**Actions**: +1. Create ADRs using `workflows/templates/TEMPLATE-ADR.md` +2. Define MVP scope using MoSCoW prioritization +3. Plan incremental feature roadmap using `workflows/templates/TEMPLATE-PROJECT-PLAN.md` +4. Customize template (pyproject.toml, README.md) + +**Output**: +- `ADR.md` - Overwritten with project-specific decisions +- `context/PROJECT_PLAN.md` - MVP scope + feature roadmap + +**Success Criteria**: Clear MVP definition, actionable roadmap + +### [APPROVAL GATE 2] + +User reviews: +- Architecture decisions in `ADR.md` +- MVP scope and feature roadmap in `context/PROJECT_PLAN.md` + +**Options**: +- βœ… **Approve** - Proceed to Phase 3 +- πŸ”§ **Adjust scope** - Modify MVP boundaries +- πŸ“Š **Change priorities** - Reorder feature roadmap + +--- + +## Phase 3: MVP Implementation + +**Roles**: `implementation` + `review` +**Agents**: *[See Discovered Agents table - may use multiple specialists]* + +Phase 3 uses a structured **Implement β†’ Review β†’ Fix** loop for each deliverable to enforce test creation and code quality. + +Select implementation agent based on project type: + +| Project Type | Recommended Specialty | +|--------------|----------------------| +| RAG, embeddings, LLM integration | AI/ML specialist | +| APIs, databases, DDD | Backend specialist | +| Autonomous agents | Agentic specialist | +| ETL, data pipelines | Data specialist | + +**Input**: +- Approved `ADR.md` and `context/PROJECT_PLAN.md` from Phase 2 +- Customized template skeleton + +--- + +### 3.1 Implementation Sub-phase + +**Role**: `implementation` + +**Actions**: +1. Implement one deliverable (feature/module) from PROJECT_PLAN.md +2. Write unit tests following naming convention: `test____` +3. Ensure tests are behavioral (test outcomes, not mocks) +4. Run `just test` locally to verify tests pass +5. Mark deliverable as "Ready for Review" + +**Test Requirements**: +- Every new module must have corresponding unit tests +- Tests must follow naming convention from CLAUDE.md +- Minimum 80% coverage for new code +- No tautological tests (testing mocks, assignment, or nothing) + +--- + +### 3.2 Review Sub-phase + +**Role**: `review` (read-only) + +**Actions**: +1. Verify tests exist for all new implementation files +2. Check test naming conventions (`test____`) +3. Detect tautological tests (testing mocks instead of behavior) +4. Verify coverage >= 80% for new code +5. Check for hallucinated packages (imports that don't exist) +6. Scan for security vulnerabilities (bare except, SQL injection) +7. Generate review report + +**Blocking Criteria** (must fix before progression): +- [ ] Tests exist for all new implementation files +- [ ] Tests follow `test____` naming convention +- [ ] No tautological tests detected +- [ ] Coverage >= 80% for new code +- [ ] No hallucinated package imports +- [ ] No security vulnerabilities + +**Review Report Format**: +``` +## Test & Quality Review Report +**Deliverable**: [name] +**Status**: PASS | FAIL +**Coverage**: X% (required: 80%) +**Review Cycle**: N of 3 + +### Blocking Issues (must fix) +### Warnings (should fix) +### Passed Checks +### Remediation Required +``` + +--- + +### 3.3 Fix Sub-phase (If Review Fails) + +**Role**: `implementation` + +**Actions**: +1. Address each blocking issue from remediation list +2. Add missing tests if required +3. Fix tautological tests to be behavioral +4. Re-run `just test` locally +5. Return to 3.2 for re-review + +**Loop Control**: +- Maximum **3 fix cycles** per deliverable +- If still failing after 3 cycles β†’ **escalate to user** + +--- + +### Validation Loop Diagram + +``` +PER DELIVERABLE: + +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ IMPLEMENT β”‚ ← implementation role (code + tests) +β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + v +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ REVIEW β”‚ ← review role (read-only) +β””β”€β”€β”€β”€β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”΄β”€β”€β”€β”€β” + β”‚ PASS? β”‚ + β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ + Yes β”‚ No + β”‚ β”‚ + β”Œβ”€β”€β”€β”€β”˜ └────┐ + v v +β”Œβ”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β” +β”‚ DONE β”‚ β”‚ FIX β”‚ ← implementation role +β””β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”¬β”€β”€β”€β”˜ + β”‚ + β”Œβ”€β”€β”€β”€β”΄β”€β”€β”€β”€β” + β”‚Cycle < 3β”‚ + β””β”€β”€β”€β”€β”¬β”€β”€β”€β”€β”˜ + Yes β”‚ No + β”‚ β”‚ + (back to REVIEW) β†’ ESCALATE TO USER +``` + +--- + +### Escalation Behavior + +If a deliverable fails review 3 times: + +1. **Generate Escalation Report** with: + - All persistent blocking issues + - Summary of attempted fixes across cycles + - Recommendations for resolution + +2. **Halt workflow** and present to user + +3. **User Options**: + - ⚠️ **Override** - Proceed anyway (document risk in PR) + - πŸ”§ **Manual fix** - User addresses issues directly + - ⏭️ **Descope** - Move feature to Phase 4 or backlog + - βœ‚οΈ **Split** - Break into smaller deliverables + +--- + +### Deliverable Tracking + +| Deliverable | Status | Cycle | Blocking Issues | Owner | +|-------------|--------|-------|-----------------|-------| +| *[Feature 1]* | Implementing | 0 | - | implementation | +| *[Feature 2]* | In Review | 1 | Missing tests for X | review | +| *[Feature 3]* | Passed | 2 | - | - | + +--- + +**Output**: Working MVP with tests (all deliverables passed review) + +**Success Criteria**: +- All deliverables pass review +- `just validate-branch` passes +- Coverage >= 80% for new code + +### [APPROVAL GATE 3] + +User validates: +- MVP functionality (end-to-end) +- All deliverables passed review loop +- Test coverage meets requirements + +**Options**: +- βœ… **Approve** - Proceed to Phase 4 +- πŸ› **Request fixes** - Specify additional issues to address +- ✏️ **Adjust scope** - Modify MVP boundaries + +--- + +## Phase 4: Iterative Enhancement + +**Roles**: `implementation` + `review` +**Agents**: *[Same specialists from Phase 3, or different per feature]* + +Phase 4 follows the same **Implement β†’ Review β†’ Fix** loop as Phase 3 for each feature in the roadmap. + +**Input**: +- Working MVP from Phase 3 +- Feature roadmap from `context/PROJECT_PLAN.md` + +--- + +### 4.1 Feature Implementation + +**Role**: `implementation` + +**Actions**: +1. Select next feature from PROJECT_PLAN.md roadmap (by priority) +2. Implement feature with unit tests +3. Follow same test requirements as Phase 3: + - Tests for all new modules + - `test____` naming + - 80% coverage for new code + - Behavioral tests (not tautological) +4. Run `just test` locally +5. Mark feature as "Ready for Review" + +--- + +### 4.2 Feature Review + +**Role**: `review` (read-only) + +**Actions**: +1. Run same validation as Phase 3.2 (tests, naming, coverage, quality) +2. Generate review report +3. If PASS β†’ proceed to user approval +4. If FAIL β†’ implementation agent fixes (max 3 cycles) + +--- + +### 4.3 Feature Approval + +### [APPROVAL GATE per feature] + +After review passes, user validates: +- Feature functionality +- Review report shows PASS +- Integration with existing MVP + +**Options**: +- βœ… **Approve** - Add next feature from roadmap +- πŸ”„ **Request changes** - Modify current feature +- ⏭️ **Skip to next** - Move to next roadmap item +- πŸ›‘ **Stop** - End enhancement phase + +--- + +**Output**: Enhanced system with additional features + +**Success Criteria**: +- Each feature passes review loop +- `just validate-branch` passes +- Coverage maintained >= 80% + +--- + +## Folder Structure + +``` +project-root/ +β”œβ”€β”€ context/ # User seeds + workflow outputs +β”‚ β”œβ”€β”€ README.md # Instructions for seeds +β”‚ β”œβ”€β”€ PRODUCT.md # User seed: product/business perspective +β”‚ β”œβ”€β”€ ENGINEERING.md # User seed: technical perspective +β”‚ β”œβ”€β”€ PRD.md # Phase 1 output: expanded PRD +β”‚ └── PROJECT_PLAN.md # Phase 2 output: MVP scope + roadmap +β”œβ”€β”€ workflows/ # Workflow support files +β”‚ β”œβ”€β”€ PROJECT_INIT_WORKFLOW.md # This file +β”‚ β”œβ”€β”€ QUICK-START.md # Step-by-step guide +β”‚ β”œβ”€β”€ README.md # Template docs +β”‚ └── templates/ # Output templates +β”‚ β”œβ”€β”€ TEMPLATE-PRD.md +β”‚ β”œβ”€β”€ TEMPLATE-ADR.md +β”‚ └── TEMPLATE-PROJECT-PLAN.md +β”œβ”€β”€ ADR.md # Overwritten in Phase 2 +└── src/ # Implementation (Phase 3-4) +``` + +--- + +## Artifact Locations + +| Artifact | Location | Created By | +|----------|----------|------------| +| User seeds | `context/PRODUCT.md`, `context/ENGINEERING.md` | **User** (before workflow) | +| Output templates | `workflows/templates/` | System (part of template) | +| Expanded PRD | `context/PRD.md` | `research` role (Phase 1) | +| Research synthesis | `context/RESEARCH_SYNTHESIS.md` | `research` role (Phase 1) | +| Architecture decisions | `ADR.md` | `architecture` role (Phase 2) | +| Project plan | `context/PROJECT_PLAN.md` | `architecture` role (Phase 2) | +| Implementation | `src/*.py` | `implementation` role (Phase 3-4) | + +--- + +## Invocation Examples + +**Step 0** - Fill out seeds: +``` +Edit context/PRODUCT.md and context/ENGINEERING.md with your project details +``` + +**Phase 0** - Discover agents: +``` +Run the project initialization workflow +``` +Claude Code will scan for specialists, present them, and update this file. + +**Phase 1** - Start context research: +``` +Research context for this project using [discovered research agent] +``` + +**Phase 2** - Plan architecture: +``` +Plan architecture for this project using [discovered architecture agent] +``` + +**Phase 3** - Implement MVP: +``` +Implement MVP using [discovered implementation agent(s)] +``` + +**Phase 4** - Add features: +``` +Add [feature name] from the roadmap using [appropriate implementation agent] +``` + +--- + +## Template Usage + +Templates in `workflows/templates/` define output formats. Agents use these to produce consistent documents. + +### PRD Template β†’ `context/PRD.md` + +**Used by**: `research` role (Phase 1) + +**Key sections**: +- Problem Statement (from user seeds) +- Objectives (from success criteria) +- User Stories with acceptance criteria +- Constraints (from engineering seed) + +### ADR Template β†’ `ADR.md` + +**Used by**: `architecture` role (Phase 2) + +**Typical decisions**: +- ADR-001: Database choice +- ADR-002: Web framework +- ADR-003: Authentication +- ADR-004: Deployment +- ADR-005: State management + +**Each ADR must have**: Context, Decision, Consequences, Alternatives + +### Project Plan Template β†’ `context/PROJECT_PLAN.md` + +**Used by**: `architecture` role (Phase 2) + +**Key sections**: +- Overview table with project details +- Phase status tracking +- MVP Scope (Must-Have features only) +- Feature Roadmap (Should/Could/Won't) + +--- + +## Troubleshooting + +### "Agent doesn't understand requirements" + +**Cause**: User description too vague or context missing + +**Fix**: +- Provide more specific problem statement +- Describe target users explicitly +- Include examples of desired functionality +- State constraints clearly + +### "ADR doesn't match expectations" + +**Cause**: PRD unclear or research missed key requirements + +**Fix**: +- Review PRD - is problem statement clear? +- Provide direct feedback on what to change +- Edit ADR directly if easier + +### "MVP scope too large/small" + +**Cause**: Unclear boundaries in PRD or misaligned priorities + +**Fix**: +- Review Must-Have vs. Should-Have in PRD +- Explicitly state what's NOT in MVP +- Adjust PROJECT_PLAN.md scope directly + +### "Feature implementation diverges from plan" + +**Cause**: Agent didn't read context or context unclear + +**Fix**: +- Ensure ADR has "Accepted" status +- Ask agent to re-read specific context file +- Update context if requirements changed + +--- + +## Agent Best Practices + +1. **Read all context before acting**: Don't skip to implementation +2. **Ask clarifying questions**: Don't guess user intent +3. **One phase at a time**: Don't jump ahead +4. **Follow templates exactly**: Consistency matters +5. **Cross-reference liberally**: Link to sources and related docs + +--- + +## Workflow Refinement + +After each workflow execution, update this file to: + +- [ ] Refine phase instructions based on learnings +- [ ] Add project-type-specific guidance +- [ ] Improve success criteria +- [ ] Document common issues and solutions + +### Refinement Log + +| Date | Change | Reason | +|------|--------|--------| +| *Initial* | Created workflow | Template adaptation needs | +| 2025-11-28 | Reorganized: context/ for seeds, workflows/ for support | Clearer separation of user inputs vs. system files | +| 2025-11-28 | Added Phase 0 agent discovery, abstract roles | Workflow portability - users may have different specialists | +| 2025-11-28 | Added `review` role with Implement β†’ Review β†’ Fix loop | Enforce test creation and code quality before progression | diff --git a/workflows/templates/TEMPLATE-ADR.md b/workflows/templates/TEMPLATE-ADR.md new file mode 100644 index 0000000..92af24b --- /dev/null +++ b/workflows/templates/TEMPLATE-ADR.md @@ -0,0 +1,361 @@ +# Architecture Decision Records + +--- +**Status**: Active +**Created**: [YYYY-MM-DD] +**Last Updated**: [YYYY-MM-DD] +--- + +This document captures significant architectural decisions made during the project. Each decision is recorded with context, rationale, and consequences. + +**Format**: Each ADR follows the structure: +- **Title**: Short noun phrase +- **Status**: Proposed | Accepted | Deprecated | Superseded +- **Date**: When decision was made +- **Context**: Problem or requirement +- **Decision**: What was chosen +- **Consequences**: Positive, negative, and risks +- **Alternatives**: Other options considered + +--- + +## ADR-001: [Decision Title] + +**Status**: Accepted +**Date**: [YYYY-MM-DD] +**Deciders**: [Roles/names] + +### Context + +[2-3 paragraphs describing]: +- What problem or requirement drove this decision? +- What constraints affected the choice? +- What was the existing situation (if applicable)? + +Example: +> The application needs to persist user data, research findings, and project metadata. Requirements include ACID transactions for user operations, JSON field support for flexible research metadata storage, and full-text search for knowledge retrieval. The team has strong PostgreSQL experience but limited NoSQL expertise. Budget allows for managed database services. + +### Decision + +[Clear statement of what was chosen] + +Example: +> We will use **PostgreSQL 15+** as the primary database for all application data. + +**Key implementation details**: +- [Specific approach point 1] +- [Specific approach point 2] +- [Specific approach point 3] + +Example: +- Use `jsonb` columns for research metadata (flexible schema) +- Implement full-text search with GIN indexes on document content +- Deploy via managed service (AWS RDS or GCP Cloud SQL) +- Use connection pooling (pgbouncer) for efficiency + +### Consequences + +#### Positive +- [Benefit 1] +- [Benefit 2] +- [Benefit 3] + +Example: +- Native JSON support eliminates need for separate document store +- ACID guarantees ensure data consistency for critical operations +- Strong ecosystem (ORMs, migration tools, monitoring) +- Team expertise reduces ramp-up time + +#### Negative +- [Trade-off 1] +- [Trade-off 2] + +Example: +- More complex operations than managed NoSQL (manual backups, tuning) +- Vertical scaling limits (plan for read replicas if traffic grows) +- Higher baseline cost than serverless databases + +#### Risks +- [Risk 1 and mitigation] +- [Risk 2 and mitigation] + +Example: +- **Risk**: Database becomes bottleneck at scale + - **Mitigation**: Implement read replicas and caching (Redis) early +- **Risk**: Team unfamiliar with PostgreSQL-specific optimizations + - **Mitigation**: Allocate 2 weeks for training and performance testing + +### Alternatives Considered + +#### Alternative 1: [Name] +- **Pros**: [Benefits] +- **Cons**: [Drawbacks] +- **Why rejected**: [Reason] + +Example: +#### Alternative 1: MongoDB (Document Store) +- **Pros**: Flexible schema, horizontal scaling, native JSON +- **Cons**: No ACID transactions (until v4.2), team unfamiliarity, complex aggregation queries +- **Why rejected**: ACID requirements and team expertise outweigh schema flexibility benefits + +#### Alternative 2: [Name] +- **Pros**: [Benefits] +- **Cons**: [Drawbacks] +- **Why rejected**: [Reason] + +--- + +## ADR-002: [Decision Title] + +**Status**: Accepted +**Date**: [YYYY-MM-DD] +**Deciders**: [Roles/names] + +### Context + +[Problem description] + +Example: +> The application requires a web API for client interactions and background task processing for long-running research jobs. Performance requirements include <100ms response time for 95% of API requests and support for concurrent processing of multiple research tasks. + +### Decision + +Example: +> We will use **FastAPI** (Python 3.11+) with **async/await** for the web framework. + +**Key implementation details**: +- Async endpoints for I/O-bound operations (database, external APIs) +- Background tasks via Celery with Redis message broker +- Pydantic models for request/response validation +- OpenAPI auto-generation for API documentation + +### Consequences + +#### Positive +- Native async support (better concurrency than Flask/Django) +- Automatic request validation and serialization (Pydantic) +- Modern Python features (type hints, async) +- Fast development with auto-generated docs + +#### Negative +- Smaller ecosystem than Flask/Django (fewer plugins) +- Team needs to learn async patterns +- Debugging async code more complex + +#### Risks +- **Risk**: Async code introduces race conditions + - **Mitigation**: Use async-safe libraries, comprehensive testing +- **Risk**: CPU-bound tasks block event loop + - **Mitigation**: Offload to Celery workers, not async endpoints + +### Alternatives Considered + +#### Alternative 1: Django REST Framework +- **Pros**: Batteries-included, large ecosystem, team familiarity +- **Cons**: Not async-native (ASGI support limited), heavier than needed +- **Why rejected**: Performance requirements favor async-first framework + +#### Alternative 2: Flask with async extensions +- **Pros**: Lightweight, team familiarity, large ecosystem +- **Cons**: Async support bolted-on (not native), manual validation +- **Why rejected**: FastAPI provides better DX for async and validation + +--- + +## ADR-003: [Decision Title - Authentication] + +**Status**: Accepted +**Date**: [YYYY-MM-DD] +**Deciders**: [Roles/names] + +### Context + +[Problem description] + +Example: +> Application requires user authentication for personalized research storage and multi-user collaboration. Must support both web UI and API access. Security requirements include password hashing, session management, and rate limiting. + +### Decision + +Example: +> We will use **OAuth 2.0 with JWT tokens** for authentication and **role-based access control (RBAC)** for authorization. + +**Key implementation details**: +- JWT tokens with 1-hour expiration, refresh tokens with 7-day expiration +- Password hashing via bcrypt (cost factor 12) +- Social login (Google, GitHub) via OAuth providers +- Roles: Admin, Researcher, Viewer + +### Consequences + +[Fill in positive, negative, risks as above] + +### Alternatives Considered + +[Fill in alternatives as above] + +--- + +## ADR-004: [Decision Title - Deployment] + +**Status**: Proposed +**Date**: [YYYY-MM-DD] +**Deciders**: [Roles/names] + +### Context + +Example: +> Application needs to be deployed with high availability, automatic scaling, and minimal operational overhead. Budget allows for managed services. Team prefers infrastructure-as-code for reproducibility. + +### Decision + +Example: +> We will deploy on **Google Cloud Platform (GCP)** using: +- Cloud Run for application containers (auto-scaling) +- Cloud SQL for PostgreSQL (managed database) +- Cloud Storage for file uploads +- Terraform for infrastructure provisioning + +**Key implementation details**: +- Docker multi-stage builds for optimized images +- CI/CD via GitHub Actions +- Staging and production environments +- Cost alerts at $500/month threshold + +### Consequences + +[Fill in as above] + +### Alternatives Considered + +#### Alternative 1: AWS (ECS + RDS + S3) +- **Pros**: Broader service catalog, more community resources +- **Cons**: More complex pricing, steeper learning curve +- **Why rejected**: Team familiarity with GCP, simpler pricing model + +#### Alternative 2: Heroku +- **Pros**: Simplest deployment, zero-config +- **Cons**: Higher cost at scale, less control +- **Why rejected**: Cost becomes prohibitive beyond hobby tier + +--- + +## ADR-005: [Decision Title - State Management] + +**Status**: Accepted +**Date**: [YYYY-MM-DD] +**Deciders**: [Roles/names] + +### Context + +[Problem description for state management, caching, session storage] + +### Decision + +[Your choice - Redis, in-memory, database sessions, etc.] + +### Consequences + +[Fill in as above] + +### Alternatives Considered + +[Fill in as above] + +--- + +## Template for New ADRs + +Copy this template when adding a new decision: + +```markdown +## ADR-XXX: [Short Descriptive Title] + +**Status**: Proposed | Accepted | Deprecated | Superseded +**Date**: YYYY-MM-DD +**Deciders**: [Roles/names] + +### Context +[What problem requires a decision? What constraints exist?] + +### Decision +[What is being decided?] + +**Key implementation details**: +- [Detail 1] +- [Detail 2] + +### Consequences + +#### Positive +- [Benefit] + +#### Negative +- [Trade-off] + +#### Risks +- **Risk**: [Description] + - **Mitigation**: [How we'll address it] + +### Alternatives Considered + +#### Alternative 1: [Name] +- **Pros**: [Benefits] +- **Cons**: [Drawbacks] +- **Why rejected**: [Reason] +``` + +--- + +## ADR Status Definitions + +| Status | Meaning | Agent Behavior | +|--------|---------|----------------| +| **Proposed** | Under consideration, not yet approved | Consider but don't enforce | +| **Accepted** | Approved and active | **Mandatory** - must follow this decision | +| **Deprecated** | No longer recommended but not forbidden | Avoid in new code, flag in reviews | +| **Superseded** | Replaced by newer decision | **Actively avoid** - use new decision instead | + +--- + +## Decision Index + +Quick reference for all decisions: + +| ADR | Title | Status | Date | +|-----|-------|--------|------| +| 001 | [Database Choice] | Accepted | [YYYY-MM-DD] | +| 002 | [Web Framework] | Accepted | [YYYY-MM-DD] | +| 003 | [Authentication] | Accepted | [YYYY-MM-DD] | +| 004 | [Deployment Platform] | Proposed | [YYYY-MM-DD] | +| 005 | [State Management] | Accepted | [YYYY-MM-DD] | + +--- + +## Notes for AI Agents + +**When reading this document**: +- Apply **Accepted** decisions as mandatory constraints +- Avoid **Deprecated** and **Superseded** approaches +- Consider **Proposed** decisions but confirm with user before implementing +- Check alternatives if requirements don't fit accepted decision + +**When adding a new ADR**: +- Assign next sequential number (ADR-XXX) +- Set status to "Proposed" initially +- Include at least 2 alternatives considered +- Update Decision Index table at bottom +- Add date to document's "Last Updated" field + +**Common ADR Topics**: +- Database technology +- Web framework +- Authentication/authorization +- API design (REST vs. GraphQL) +- Frontend framework (if applicable) +- Deployment platform +- CI/CD approach +- Monitoring/logging strategy +- Testing strategy +- State management +- Error handling approach diff --git a/workflows/templates/TEMPLATE-PRD.md b/workflows/templates/TEMPLATE-PRD.md new file mode 100644 index 0000000..74e0b69 --- /dev/null +++ b/workflows/templates/TEMPLATE-PRD.md @@ -0,0 +1,303 @@ +# Product Requirements Document: [Project Name] + +--- +**Status**: Draft +**Created**: [YYYY-MM-DD] +**Last Updated**: [YYYY-MM-DD] +**Owner**: [Role/Name] +**Version**: 0.1 +--- + +## Problem Statement + +**2-3 sentences describing**: +- Who is affected: [target users/stakeholders] +- What problem exists: [core pain point] +- Impact if not solved: [business/user consequence] + +Example: +> Software developers spend 40% of their time context-switching between tools when researching technical decisions. This fragmentation leads to inconsistent documentation and repeated research across team members. Without a centralized knowledge system, teams lose productivity and struggle to onboard new members effectively. + +--- + +## Objectives & Key Results (OKRs) + +### Business Objective +[What is the desired outcome?] + +Example: Enable teams to make faster, better-informed technical decisions. + +### Key Results + +1. **[Measurable indicator 1]**: [Target metric] + - Baseline: [Current state] + - Goal: [Target state] + +2. **[Measurable indicator 2]**: [Target metric] + - Baseline: [Current state] + - Goal: [Target state] + +3. **[Measurable indicator 3]**: [Target metric] + - Baseline: [Current state] + - Goal: [Target state] + +Example: +1. **Research time reduction**: 40% β†’ 20% of developer time + - Baseline: 16 hours/week per developer + - Goal: 8 hours/week per developer + +2. **Documentation consistency**: 30% β†’ 80% of decisions documented + - Baseline: 3 of 10 decisions have written rationale + - Goal: 8 of 10 decisions have written rationale + +--- + +## User Stories + +Use format: "As a [user type], I want [capability] so that [benefit]" + +### Core User Stories (Must-Have) + +#### US-001: [Story Title] +**As a** [user role] +**I want** [specific capability] +**So that** [business value/benefit] + +**Acceptance Criteria**: +- [ ] [Testable condition 1] +- [ ] [Testable condition 2] +- [ ] [Testable condition 3] + +**Dependencies**: [Other stories, external systems, or "None"] + +**Priority**: Must-Have + +--- + +#### US-002: [Story Title] +**As a** [user role] +**I want** [specific capability] +**So that** [business value/benefit] + +**Acceptance Criteria**: +- [ ] [Testable condition 1] +- [ ] [Testable condition 2] + +**Dependencies**: [Other stories or "None"] + +**Priority**: Must-Have + +--- + +### Additional User Stories (Should-Have / Could-Have) + +#### US-003: [Story Title] +**Priority**: Should-Have + +[Same format as above] + +--- + +## Solution Requirements + +### High-Level Approach + +[1-2 paragraphs describing the proposed solution architecture] + +Example: +> The system will consist of a web-based interface for research input and a command-line tool for developer workflows. A vector database will store research findings with metadata for efficient retrieval. AI agents will synthesize findings and suggest architectural decisions based on project context. + +### Functional Requirements + +#### Must-Have (MVP) + +1. **[Feature 1]**: [Description] + - [Sub-requirement 1.1] + - [Sub-requirement 1.2] + +2. **[Feature 2]**: [Description] + - [Sub-requirement 2.1] + - [Sub-requirement 2.2] + +3. **[Feature 3]**: [Description] + +Example: +1. **Research Collection**: System must capture web research findings + - Extract content from URLs (docs, blogs, GitHub) + - Grade evidence quality (High/Medium/Low) + - Store with source attribution and timestamps + +2. **Knowledge Synthesis**: System must consolidate findings across sources + - Deduplicate information + - Resolve contradictions + - Identify gaps in coverage + +#### Should-Have (Post-MVP Priority) + +1. **[Feature]**: [Description] +2. **[Feature]**: [Description] + +#### Could-Have (Nice to Have) + +1. **[Feature]**: [Description] +2. **[Feature]**: [Description] + +#### Won't-Have (Out of Scope for V1) + +1. **[Feature]**: [Reason for exclusion] +2. **[Feature]**: [Reason for exclusion] + +### Non-Functional Requirements + +#### Performance +- **[Metric]**: [Target] + - Example: **Response time**: < 2 seconds for 95th percentile queries + +#### Scalability +- **[Metric]**: [Target] + - Example: **Concurrent users**: Support 100 simultaneous users + +#### Security +- **[Requirement]**: [Description] + - Example: **Authentication**: OAuth 2.0 with role-based access control + +#### Reliability +- **[Metric]**: [Target] + - Example: **Uptime**: 99.5% availability (excluding planned maintenance) + +#### Usability +- **[Requirement]**: [Description] + - Example: **Learning curve**: New users productive within 30 minutes + +--- + +## Acceptance Criteria (Overall) + +**The project is successful when**: + +- [ ] [Measurable criterion 1] +- [ ] [Measurable criterion 2] +- [ ] [Measurable criterion 3] +- [ ] [Measurable criterion 4] + +Example: +- [ ] 80% of user stories marked "Must-Have" are implemented and tested +- [ ] End-to-end workflow (research β†’ synthesis β†’ decision) completes without errors +- [ ] Documentation coverage > 90% (all public APIs documented) +- [ ] `just validate-branch` passes (all tests, type checks, linting) + +--- + +## Constraints & Assumptions + +### Technical Constraints + +1. **[Constraint]**: [Description and impact] + - Example: **Python 3.11+ only**: Leverages modern type hints and performance improvements + +2. **[Constraint]**: [Description and impact] + - Example: **OpenAI API required**: No offline mode in V1 + +### Business Constraints + +1. **[Constraint]**: [Description and impact] + - Example: **Budget**: $500/month max for API costs + +2. **[Constraint]**: [Description and impact] + - Example: **Timeline**: MVP must launch within 8 weeks + +### Assumptions + +1. **[Assumption]**: [What we're assuming is true] + - Example: **User expertise**: Users have basic command-line proficiency + +2. **[Assumption]**: [What we're assuming is true] + - Example: **Data availability**: Research sources (docs, blogs) remain accessible + +--- + +## Success Metrics + +**How we'll measure success post-launch**: + +| Metric | Target | Measurement Method | +|--------|--------|-------------------| +| [Metric name] | [Target value] | [How measured] | +| User adoption | 20 active users within 1 month | Analytics dashboard | +| Research quality | 90% of syntheses rated "helpful" | User survey | +| Time savings | 50% reduction in research time | Before/after comparison | + +--- + +## User Personas (Optional) + +### Persona 1: [Name/Role] + +**Background**: [Brief description] + +**Goals**: +- [Goal 1] +- [Goal 2] + +**Pain Points**: +- [Pain point 1] +- [Pain point 2] + +**How This Project Helps**: [1-2 sentences] + +--- + +### Persona 2: [Name/Role] + +[Same structure as above] + +--- + +## Open Questions + +**Unresolved issues that need clarification**: + +1. **[Question]**: [Description] + - **Blocker?**: Yes/No + - **Owner**: [Who will resolve] + - **Target date**: [YYYY-MM-DD] + +2. **[Question]**: [Description] + - **Blocker?**: Yes/No + - **Owner**: [Who will resolve] + +--- + +## Appendix + +### Related Documents +- [Architecture Decisions](../ADR.md) +- [Project Plan](./PROJECT_PLAN.md) +- [Research Synthesis](./RESEARCH_SYNTHESIS.md) + +### Glossary +- **[Term]**: [Definition] +- **[Term]**: [Definition] + +### Change Log + +| Date | Change | Author | +|------|--------|--------| +| [YYYY-MM-DD] | Initial draft created | [Name] | +| [YYYY-MM-DD] | Added US-005 based on user feedback | [Name] | + +--- + +## Notes for AI Agents + +**When reading this PRD**: +- Focus on "Must-Have" requirements for MVP scope +- Prioritize user stories with no dependencies first +- Flag ambiguous acceptance criteria before implementation +- Cross-reference with ADR.md for architectural constraints + +**When updating this PRD**: +- Add date and reason to Change Log +- Update version number (increment by 0.1 for minor, 1.0 for major) +- Notify team if changing "Must-Have" scope +- Preserve existing user story IDs (don't renumber) diff --git a/workflows/templates/TEMPLATE-PROJECT-PLAN.md b/workflows/templates/TEMPLATE-PROJECT-PLAN.md new file mode 100644 index 0000000..b0f66e9 --- /dev/null +++ b/workflows/templates/TEMPLATE-PROJECT-PLAN.md @@ -0,0 +1,418 @@ +# Project Plan: [Project Name] + +--- +**Status**: Active +**Created**: [YYYY-MM-DD] +**Last Updated**: [YYYY-MM-DD] +**Current Phase**: Phase 1 - Context Research +--- + +## Overview + +| Aspect | Details | +|--------|---------| +| **Project Goal** | [1 sentence describing end state] | +| **Target Users** | [Primary user personas] | +| **Success Criteria** | [How we measure success] | +| **Timeline** | [Estimated duration or target date] | +| **Team Size** | [Number of contributors] | + +Example: +| Aspect | Details | +|--------|---------| +| **Project Goal** | Enable developers to capture, synthesize, and retrieve research findings efficiently | +| **Target Users** | Software engineers, technical leads, architects | +| **Success Criteria** | 50% reduction in research time, 80% of decisions documented | +| **Timeline** | 8 weeks to MVP, 4 weeks for enhancement phase | +| **Team Size** | 2 developers + 1 PM | + +--- + +## Phase 1: Context Research + +**Status**: [Not Started | In Progress | Complete | Blocked] +**Owner**: context-engineer +**Duration**: [Estimated time] + +### Objectives + +1. [Objective 1] +2. [Objective 2] +3. [Objective 3] + +Example: +1. Understand technical landscape for chosen domain +2. Identify best practices and anti-patterns +3. Create structured context for architecture planning + +### Deliverables + +- [ ] `context/` folder structure created +- [ ] `context/product/PRD.md` drafted (based on user input) +- [ ] `context/research/technology-comparison.md` completed +- [ ] `context/research/best-practices.md` completed +- [ ] `~/.claude/context/research/{project}-synthesis.md` written + +### Success Criteria + +- [ ] Research synthesis includes 5+ sources with evidence grades +- [ ] PRD has complete "Must-Have" user stories +- [ ] All claims in research docs are sourced +- [ ] User approves context structure and content + +### Blockers & Risks + +| Issue | Impact | Mitigation | Owner | +|-------|--------|------------|-------| +| [Blocker/risk] | High/Med/Low | [How to address] | [Who] | + +Example: +| Issue | Impact | Mitigation | Owner | +|-------|--------|------------|-------| +| Insufficient user requirements | High | Schedule clarification meeting | PM | +| Technology rapidly evolving | Medium | Focus on stable core, flag experimental | context-engineer | + +--- + +## Phase 2: Architecture Planning + +**Status**: [Not Started | In Progress | Complete | Blocked] +**Owner**: project-architect +**Duration**: [Estimated time] + +### Objectives + +1. [Objective 1] +2. [Objective 2] + +Example: +1. Make and document key architectural decisions +2. Define clear MVP scope with feature roadmap +3. Prepare template skeleton for implementation + +### Deliverables + +- [ ] `context/architecture/ADR.md` with 3-5 decisions +- [ ] `context/planning/PROJECT_PLAN.md` (this file) completed +- [ ] `pyproject.toml` updated with dependencies +- [ ] `README.md` customized for project +- [ ] `src/` folder structure created (if applicable) + +### Success Criteria + +- [ ] Each ADR follows standard format (Context/Decision/Consequences) +- [ ] MVP scope clearly separates Must/Should/Could/Won't +- [ ] Roadmap has at least 5 prioritized features +- [ ] User approves architecture and plan + +### Dependencies + +- **Requires**: Phase 1 complete (approved PRD and research) +- **Blocks**: Phase 3 (cannot start implementation without plan) + +### Blockers & Risks + +[Same table format as Phase 1] + +--- + +## Phase 3: MVP Implementation + +**Status**: [Not Started | In Progress | Complete | Blocked] +**Owner**: [domain-specialist - select based on project type] +**Duration**: [Estimated time] + +### Objectives + +1. Implement all "Must-Have" features +2. Create end-to-end working system +3. Establish testing and validation pipeline + +### MVP Scope + +#### Must-Have Features (In Scope) + +| Feature | Description | Acceptance Criteria | Status | +|---------|-------------|---------------------|--------| +| [Feature 1] | [Brief description] | [How to test] | Not Started | +| [Feature 2] | [Brief description] | [How to test] | Not Started | +| [Feature 3] | [Brief description] | [How to test] | Not Started | + +Example: +| Feature | Description | Acceptance Criteria | Status | +|---------|-------------|---------------------|--------| +| User Auth | Email/password login | User can register, login, logout | Not Started | +| Research Capture | Save web findings with metadata | User can add URL, extract content, save to DB | Not Started | +| Search | Full-text search of findings | User can search and get ranked results | Not Started | + +#### Out of Scope (MVP) + +**These features are intentionally excluded from MVP**: +- [Feature] - [Reason for exclusion] +- [Feature] - [Reason for exclusion] + +Example: +- Social login (Google/GitHub) - Adds complexity without validating core value +- Real-time collaboration - Not needed for single-user validation +- Mobile app - Web-first approach, mobile later if needed + +### Deliverables + +- [ ] All Must-Have features implemented +- [ ] Unit tests for core functionality (>70% coverage) +- [ ] Integration tests for end-to-end flows +- [ ] Documentation updated (README, API docs) +- [ ] `just validate-branch` passes + +### Success Criteria + +- [ ] End-to-end user workflow completes without errors +- [ ] All acceptance criteria met for Must-Have features +- [ ] Test suite passes (`just test`) +- [ ] Type checking passes (`just type-check`) +- [ ] Linting passes (`just lint`) +- [ ] User validates MVP functionality + +### Dependencies + +- **Requires**: Phase 2 complete (approved architecture and plan) +- **Blocks**: Phase 4 (cannot enhance without working MVP) + +### Blockers & Risks + +[Same table format as Phase 1] + +--- + +## Phase 4: Iterative Enhancement + +**Status**: [Not Started | In Progress | Complete | Blocked] +**Owner**: [same domain-specialist from Phase 3] +**Duration**: [Estimated time or "Ongoing"] + +### Objectives + +1. Incrementally add features from roadmap +2. Incorporate user feedback +3. Maintain quality as system grows + +### Feature Roadmap + +Features prioritized using **MoSCoW**: +- **Must**: Critical for initial release +- **Should**: Important but not critical +- **Could**: Nice to have if time permits +- **Won't**: Out of scope for V1 + +| Priority | Feature | Description | Dependencies | Status | +|----------|---------|-------------|--------------|--------| +| Must | [Feature] | [Brief desc] | [Other features] | Not Started | +| Should | [Feature] | [Brief desc] | [Other features] | Not Started | +| Should | [Feature] | [Brief desc] | [Other features] | Not Started | +| Could | [Feature] | [Brief desc] | [Other features] | Not Started | +| Won't | [Feature] | [Brief desc] | - | Deferred | + +Example: +| Priority | Feature | Description | Dependencies | Status | +|----------|---------|-------------|--------------|--------| +| Must | Social Login | Google/GitHub OAuth | User Auth | Not Started | +| Must | Export Results | Download research as Markdown/PDF | Search | Not Started | +| Should | Tagging System | Categorize findings by topic | Research Capture | Not Started | +| Should | Sharing | Share findings with team | User Auth | Not Started | +| Could | Browser Extension | Capture research from browser | API | Not Started | +| Won't | AI Summarization | Auto-summarize long articles | - | Deferred to V2 | + +### Implementation Approach + +**Per Feature**: +1. Review feature requirements and acceptance criteria +2. Update architecture if needed (new ADR) +3. Implement feature in isolated branch +4. Add tests (unit + integration) +5. Update documentation +6. Submit for user approval +7. Merge after approval + +**Approval Gates**: User reviews each feature before next begins + +### Success Criteria (Per Feature) + +- [ ] Feature meets acceptance criteria from roadmap +- [ ] Tests added and passing +- [ ] Documentation updated +- [ ] No regression in existing functionality +- [ ] `just validate-branch` passes + +### Dependencies + +- **Requires**: Phase 3 complete (working MVP) +- **Enables**: Production release + +### Blockers & Risks + +[Same table format as Phase 1] + +--- + +## Milestones + +**High-level project checkpoints**: + +| Milestone | Target Date | Criteria | Status | +|-----------|-------------|----------|--------| +| Phase 1 Complete | [YYYY-MM-DD] | Context approved | Not Started | +| Phase 2 Complete | [YYYY-MM-DD] | Architecture approved | Not Started | +| MVP Launch | [YYYY-MM-DD] | All Must-Haves working | Not Started | +| V1.0 Release | [YYYY-MM-DD] | All Should-Haves complete | Not Started | + +--- + +## Technical Milestones + +**Infrastructure and tooling setup**: + +| Milestone | Description | Owner | Status | +|-----------|-------------|-------|--------| +| Dev Environment | Docker compose, local DB | [Name] | Not Started | +| CI/CD Pipeline | GitHub Actions, auto-deploy staging | [Name] | Not Started | +| Monitoring | Logging, error tracking, metrics | [Name] | Not Started | +| Production Deploy | Cloud deployment, custom domain | [Name] | Not Started | + +--- + +## Metrics & KPIs + +**How we measure progress and success**: + +### Development Metrics + +| Metric | Target | Current | Tracking Method | +|--------|--------|---------|-----------------| +| Test Coverage | >70% | 0% | pytest-cov | +| Type Coverage | >90% | 0% | mypy strict mode | +| Build Success Rate | >95% | N/A | GitHub Actions | + +### Product Metrics (Post-Launch) + +| Metric | Target | Current | Tracking Method | +|--------|--------|---------|-----------------| +| Active Users | 20/month | 0 | Analytics dashboard | +| Research Quality | 90% "helpful" | N/A | User survey | +| Time Savings | 50% reduction | N/A | Before/after comparison | + +--- + +## Dependencies & Integrations + +**External systems and services**: + +| Dependency | Purpose | Status | Risk | +|------------|---------|--------|------| +| [Service] | [What it's for] | [Integrated/Pending] | High/Med/Low | + +Example: +| Dependency | Purpose | Status | Risk | +|------------|---------|--------|------| +| OpenAI API | LLM for synthesis | Integrated | Low - stable API | +| PostgreSQL | Primary database | Integrated | Low - mature tech | +| OAuth Providers | Social login | Pending | Medium - config complexity | +| AWS S3 | File storage | Not started | Low - well-documented | + +--- + +## Risk Register + +**Ongoing risks and mitigations**: + +| Risk | Probability | Impact | Mitigation | Owner | +|------|-------------|--------|------------|-------| +| [Risk description] | High/Med/Low | High/Med/Low | [How to address] | [Who] | + +Example: +| Risk | Probability | Impact | Mitigation | Owner | +|------|-------------|--------|------------|-------| +| Scope creep in MVP | Medium | High | Strict adherence to Must-Have list, defer to Phase 4 | PM | +| API cost exceeds budget | Low | Medium | Implement caching, rate limiting, cost alerts | Backend Dev | +| Key developer unavailable | Low | High | Documentation, knowledge sharing, pair programming | Team Lead | + +--- + +## Change Log + +**Track significant plan updates**: + +| Date | Change | Reason | Author | +|------|--------|--------|--------| +| [YYYY-MM-DD] | Initial plan created | Project kickoff | [Name] | +| [YYYY-MM-DD] | Added Social Login to Should-Have | User feedback | [Name] | +| [YYYY-MM-DD] | Moved AI Summarization to Won't | Complexity vs. value | [Name] | + +--- + +## Notes for AI Agents + +### Phase Transitions + +**Before starting each phase**: +1. Read all deliverables from previous phase +2. Validate dependencies are met +3. Confirm success criteria are clear and measurable +4. Check for blockers and flag to user + +**After completing each phase**: +1. Mark all deliverables as complete +2. Update phase status to "Complete" +3. Prepare handoff summary for next phase +4. Request user approval before proceeding + +### Roadmap Execution (Phase 4) + +**When implementing features from roadmap**: +1. Work on highest priority (Must β†’ Should β†’ Could) +2. Check dependencies (implement prerequisites first) +3. Implement one feature at a time (don't parallelize) +4. Request approval after each feature before next + +**If feature blocked**: +1. Mark status as "Blocked" +2. Document blocker in Blockers & Risks table +3. Move to next unblocked feature +4. Flag to user + +### Updating This Plan + +**When to update**: +- Feature status changes (Not Started β†’ In Progress β†’ Complete) +- New risks identified +- Priorities change +- Blockers encountered +- Milestones reached + +**How to update**: +1. Update relevant table/section +2. Add entry to Change Log +3. Update "Last Updated" date at top +4. Notify user if significant change (scope, timeline) + +--- + +## Quick Reference + +### Current Sprint Focus +**[Brief description of current work]** + +Example: +> Implementing Phase 3 MVP - Focus on Research Capture feature (US-002). Target: Complete by [date]. + +### Next 3 Priorities +1. [Priority 1] +2. [Priority 2] +3. [Priority 3] + +### Blockers Needing Attention +- [Blocker 1] - Owner: [Name] +- [Blocker 2] - Owner: [Name] + +### Upcoming Approvals +- [ ] [Approval 1] - Due: [date] +- [ ] [Approval 2] - Due: [date]