src/
├── benchmarks/ # Benchmark adapters (LoCoMo, LongMemEval, ConvoMem)
├── providers/ # Memory provider integrations (Supermemory, Mem0, Zep)
├── judges/ # LLM-as-judge implementations (OpenAI, Anthropic, Google)
├── orchestrator/ # Pipeline execution and checkpointing
│ └── phases/ # Individual phase runners (ingest, search, answer, evaluate)
├── prompts/ # Default judge prompts by question type
├── types/ # TypeScript interfaces
├── cli/ # CLI commands
├── server/ # Web UI server
└── utils/ # Config, logging, model utilities