|
| 1 | +#!/usr/bin/env python3 |
| 2 | +""" |
| 3 | +Health-check script for all MCP servers defined in mcp_server_template.json. |
| 4 | +Makes one representative call per server and reports pass/fail. |
| 5 | +
|
| 6 | +Server list and API-key requirements are derived automatically from the |
| 7 | +template file — no need to update this script when servers are added/removed. |
| 8 | +
|
| 9 | +Usage: |
| 10 | + uv run test_servers.py |
| 11 | + uv run test_servers.py --timeout 30 |
| 12 | + uv run test_servers.py --concurrency 10 |
| 13 | + uv run test_servers.py --server github # test a single server |
| 14 | +""" |
| 15 | + |
| 16 | +from __future__ import annotations |
| 17 | + |
| 18 | +import argparse |
| 19 | +import asyncio |
| 20 | +import json |
| 21 | +import re |
| 22 | +import time |
| 23 | +from dataclasses import dataclass |
| 24 | +from pathlib import Path |
| 25 | +from typing import Any |
| 26 | + |
| 27 | +import httpx |
| 28 | + |
| 29 | +# ── Paths ──────────────────────────────────────────────────────────────────── |
| 30 | +SCRIPT_DIR = Path(__file__).parent |
| 31 | +REPO_ROOT = SCRIPT_DIR.parents[1] |
| 32 | +TEMPLATE_PATH = ( |
| 33 | + REPO_ROOT / "services/agent-environment/src/agent_environment/mcp_server_template.json" |
| 34 | +) |
| 35 | +ENV_PATH = REPO_ROOT / ".env" |
| 36 | +BASE_URL = "http://localhost:1984/call-tool" |
| 37 | + |
| 38 | + |
| 39 | +# ── Parse .env ─────────────────────────────────────────────────────────────── |
| 40 | +def load_env_keys(env_path: Path) -> set[str]: |
| 41 | + """Return the set of variable names that are set (non-empty) in .env.""" |
| 42 | + if not env_path.exists(): |
| 43 | + return set() |
| 44 | + keys: set[str] = set() |
| 45 | + for line in env_path.read_text().splitlines(): |
| 46 | + line = line.strip() |
| 47 | + if not line or line.startswith("#") or "=" not in line: |
| 48 | + continue |
| 49 | + name, _, value = line.partition("=") |
| 50 | + if value.strip(): |
| 51 | + keys.add(name.strip()) |
| 52 | + return keys |
| 53 | + |
| 54 | + |
| 55 | +# ── Load server list from template ─────────────────────────────────────────── |
| 56 | +def _extract_vars(server_cfg: dict) -> list[str]: |
| 57 | + """Return all ${VAR_NAME} references found in a server config.""" |
| 58 | + return re.findall(r"\$\{([A-Z_]+)\}", json.dumps(server_cfg)) |
| 59 | + |
| 60 | + |
| 61 | +def _uses_api_key(server_cfg: dict) -> bool: |
| 62 | + return bool(_extract_vars(server_cfg)) |
| 63 | + |
| 64 | + |
| 65 | +def load_servers() -> tuple[dict[str, bool], dict[str, list[str]]]: |
| 66 | + """Return ({server: needs_key}, {server: [VAR_NAMES]}) from the template.""" |
| 67 | + with open(TEMPLATE_PATH) as f: |
| 68 | + data = json.load(f) |
| 69 | + servers = data.get("mcpServers", {}) |
| 70 | + needs_key = {name: _uses_api_key(cfg) for name, cfg in servers.items()} |
| 71 | + required_vars = {name: _extract_vars(cfg) for name, cfg in servers.items()} |
| 72 | + return needs_key, required_vars |
| 73 | + |
| 74 | + |
| 75 | +# ── Hardcoded test calls ────────────────────────────────────────────────────── |
| 76 | +# One simple, read-only call per server that exercises real functionality. |
| 77 | +# Key: server name exactly as it appears in mcp_server_template.json |
| 78 | +TEST_CALLS: dict[str, tuple[str, dict]] = { |
| 79 | + # No API key |
| 80 | + "arxiv": ( |
| 81 | + "arxiv_search_papers", |
| 82 | + {"query": "machine learning", "max_results": 1}, |
| 83 | + ), |
| 84 | + "calculator": ( |
| 85 | + "calculator_calculate", |
| 86 | + {"expression": "2 + 2"}, |
| 87 | + ), |
| 88 | + "cli-mcp-server": ( |
| 89 | + "cli-mcp-server_run_command", |
| 90 | + {"command": "ls /data"}, |
| 91 | + ), |
| 92 | + "clinicaltrialsgov-mcp-server": ( |
| 93 | + "clinicaltrialsgov-mcp-server_clinicaltrials_list_studies", |
| 94 | + {"query": {"term": "diabetes"}, "pageSize": 1}, |
| 95 | + ), |
| 96 | + "context7": ( |
| 97 | + "context7_resolve-library-id", |
| 98 | + {"libraryName": "react"}, |
| 99 | + ), |
| 100 | + "ddg-search": ( |
| 101 | + "ddg-search_search", |
| 102 | + {"query": "python programming"}, |
| 103 | + ), |
| 104 | + "desktop-commander": ( |
| 105 | + "desktop-commander_list_directory", |
| 106 | + {"path": "/data"}, |
| 107 | + ), |
| 108 | + "fetch": ( |
| 109 | + "fetch_fetch", |
| 110 | + {"url": "https://httpbin.org/get"}, |
| 111 | + ), |
| 112 | + "filesystem": ( |
| 113 | + "filesystem_list_allowed_directories", |
| 114 | + {}, |
| 115 | + ), |
| 116 | + "git": ( |
| 117 | + "git_git_status", |
| 118 | + {"repo_path": "/data/repos/mcp-server-calculator"}, |
| 119 | + ), |
| 120 | + "memory": ( |
| 121 | + "memory_search_nodes", |
| 122 | + {"query": "test"}, |
| 123 | + ), |
| 124 | + "met-museum": ( |
| 125 | + "met-museum_get-museum-object", |
| 126 | + {"objectId": 32907}, |
| 127 | + ), |
| 128 | + "mcp-code-executor": ( |
| 129 | + "mcp-code-executor_execute_code", |
| 130 | + {"code": "print(1 + 1)"}, |
| 131 | + ), |
| 132 | + "mcp-server-code-runner": ( |
| 133 | + "mcp-server-code-runner_run-code", |
| 134 | + {"languageId": "python", "code": "print(1 + 1)"}, |
| 135 | + ), |
| 136 | + "open-library": ( |
| 137 | + "open-library_get_book_by_title", |
| 138 | + {"title": "Dune"}, |
| 139 | + ), |
| 140 | + "osm-mcp-server": ( |
| 141 | + "osm-mcp-server_geocode_address", |
| 142 | + {"address": "New York City"}, |
| 143 | + ), |
| 144 | + "pubmed": ( |
| 145 | + "pubmed_search_pubmed_key_words", |
| 146 | + {"key_words": "diabetes"}, |
| 147 | + ), |
| 148 | + "weather": ( |
| 149 | + "weather_find_weather_stations", |
| 150 | + {"location": "48.0993244, -123.4256985"}, |
| 151 | + ), |
| 152 | + "whois": ( |
| 153 | + "whois_whois_domain", |
| 154 | + {"domain": "example.com"}, |
| 155 | + ), |
| 156 | + "wikipedia": ( |
| 157 | + "wikipedia_search_wikipedia", |
| 158 | + {"query": "python", "limit": 1}, |
| 159 | + ), |
| 160 | + |
| 161 | + # Needs API key |
| 162 | + "airtable": ( |
| 163 | + "airtable_list_bases", |
| 164 | + {}, |
| 165 | + ), |
| 166 | + "alchemy": ( |
| 167 | + "alchemy_fetchTokenPriceBySymbol", |
| 168 | + {"symbols": ["ETH"]}, |
| 169 | + ), |
| 170 | + "brave-search": ( |
| 171 | + "brave-search_brave_web_search", |
| 172 | + {"query": "latest AI news"}, |
| 173 | + ), |
| 174 | + "e2b-server": ( |
| 175 | + "e2b-server_run_code", |
| 176 | + {"code": "print(1 + 1)"}, |
| 177 | + ), |
| 178 | + "exa": ( |
| 179 | + "exa_web_search_exa", |
| 180 | + {"query": "python programming"}, |
| 181 | + ), |
| 182 | + "github": ( |
| 183 | + "github_list_commits", |
| 184 | + {"owner": "torvalds", "repo": "subsurface"}, |
| 185 | + ), |
| 186 | + "google-maps": ( |
| 187 | + "google-maps_maps_geocode", |
| 188 | + {"address": "New York City"}, |
| 189 | + ), |
| 190 | + "google-workspace": ( |
| 191 | + "google-workspace_list_events", |
| 192 | + {"maxResults": 1}, |
| 193 | + ), |
| 194 | + "lara-translate": ( |
| 195 | + "lara-translate_translate", |
| 196 | + {"text": [{"text": "Hello world", "translatable": True}], "target": "fr", "source": "en"}, |
| 197 | + ), |
| 198 | + "mongodb": ( |
| 199 | + "mongodb_list-databases", |
| 200 | + {}, |
| 201 | + ), |
| 202 | + "national-parks": ( |
| 203 | + "national-parks_findParks", |
| 204 | + {"q": "Yellowstone", "stateCode": "WY"}, |
| 205 | + ), |
| 206 | + "notion": ( |
| 207 | + "notion_API-get-users", |
| 208 | + {}, |
| 209 | + ), |
| 210 | + "oxylabs": ( |
| 211 | + "oxylabs_google_search_scraper", |
| 212 | + {"query": "python"}, |
| 213 | + ), |
| 214 | + "slack": ( |
| 215 | + "slack_channels_list", |
| 216 | + {"channel_types": "public_channel"}, |
| 217 | + ), |
| 218 | + "twelvedata": ( |
| 219 | + "twelvedata_GetPrice", |
| 220 | + {"params": {"symbol": "AAPL"}}, |
| 221 | + ), |
| 222 | + "weather-data": ( |
| 223 | + "weather-data_weather_current", |
| 224 | + {"q": "London"}, |
| 225 | + ), |
| 226 | +} |
| 227 | + |
| 228 | + |
| 229 | +# ── Result dataclass ────────────────────────────────────────────────────────── |
| 230 | +@dataclass |
| 231 | +class Result: |
| 232 | + server: str |
| 233 | + needs_key: bool |
| 234 | + tool: str |
| 235 | + ok: bool |
| 236 | + elapsed: float |
| 237 | + status_code: int = 0 |
| 238 | + preview: str = "" |
| 239 | + error: str = "" |
| 240 | + missing_keys: list[str] = None # env vars that were absent in .env |
| 241 | + |
| 242 | + def __post_init__(self): |
| 243 | + if self.missing_keys is None: |
| 244 | + self.missing_keys = [] |
| 245 | + |
| 246 | + |
| 247 | +# ── Per-request logic ───────────────────────────────────────────────────────── |
| 248 | +async def run_test( |
| 249 | + client: httpx.AsyncClient, |
| 250 | + server: str, |
| 251 | + needs_key: bool, |
| 252 | + tool: str, |
| 253 | + arguments: dict[str, Any], |
| 254 | + timeout: float, |
| 255 | +) -> Result: |
| 256 | + payload = {"tool_name": tool, "tool_args": arguments} |
| 257 | + t0 = time.monotonic() |
| 258 | + try: |
| 259 | + resp = await client.post(BASE_URL, json=payload, timeout=timeout) |
| 260 | + elapsed = time.monotonic() - t0 |
| 261 | + body = resp.text |
| 262 | + ok = resp.status_code < 300 |
| 263 | + |
| 264 | + # Detect tool-level errors: MCP tools return [{type:text, text:"Error: ..."}] |
| 265 | + if ok: |
| 266 | + try: |
| 267 | + data = resp.json() |
| 268 | + if isinstance(data, dict) and "error" in str(data).lower(): |
| 269 | + ok = False |
| 270 | + elif isinstance(data, list): |
| 271 | + for item in data: |
| 272 | + if isinstance(item, dict): |
| 273 | + text = item.get("text", "") |
| 274 | + if isinstance(text, str) and text.startswith("Error:"): |
| 275 | + ok = False |
| 276 | + break |
| 277 | + except Exception: |
| 278 | + pass |
| 279 | + |
| 280 | + preview = body.replace("\n", " ").strip()[:120] |
| 281 | + return Result(server, needs_key, tool, ok, elapsed, |
| 282 | + status_code=resp.status_code, preview=preview) |
| 283 | + except httpx.TimeoutException: |
| 284 | + elapsed = time.monotonic() - t0 |
| 285 | + return Result(server, needs_key, tool, False, elapsed, |
| 286 | + error=f"Timed out after {timeout}s") |
| 287 | + except Exception as exc: |
| 288 | + elapsed = time.monotonic() - t0 |
| 289 | + return Result(server, needs_key, tool, False, elapsed, error=str(exc)) |
| 290 | + |
| 291 | + |
| 292 | +# ── Main ────────────────────────────────────────────────────────────────────── |
| 293 | +async def main(timeout: float, concurrency: int, only_server: str | None) -> None: |
| 294 | + servers, required_vars = load_servers() |
| 295 | + env_keys = load_env_keys(ENV_PATH) |
| 296 | + total = len(servers) |
| 297 | + |
| 298 | + # Warn about any servers in the template that lack a test call |
| 299 | + no_test = [s for s in servers if s not in TEST_CALLS] |
| 300 | + if no_test: |
| 301 | + print(f"\n⚠️ No test call defined for: {', '.join(no_test)}") |
| 302 | + print(" Add entries to TEST_CALLS in this script to cover them.\n") |
| 303 | + |
| 304 | + # Build the list of tests to run |
| 305 | + tests = [ |
| 306 | + (name, servers[name], *TEST_CALLS[name]) |
| 307 | + for name in servers |
| 308 | + if name in TEST_CALLS and (only_server is None or name == only_server) |
| 309 | + ] |
| 310 | + |
| 311 | + sem = asyncio.Semaphore(concurrency) |
| 312 | + |
| 313 | + async def bounded(client: httpx.AsyncClient, *args: Any) -> Result: |
| 314 | + async with sem: |
| 315 | + return await run_test(client, *args) |
| 316 | + |
| 317 | + async with httpx.AsyncClient() as client: |
| 318 | + tasks = [bounded(client, *t, timeout) for t in tests] |
| 319 | + results: list[Result] = await asyncio.gather(*tasks) |
| 320 | + |
| 321 | + # Annotate failed results with any missing .env keys |
| 322 | + for r in results: |
| 323 | + if not r.ok: |
| 324 | + r.missing_keys = [ |
| 325 | + v for v in required_vars.get(r.server, []) |
| 326 | + if v not in env_keys |
| 327 | + ] |
| 328 | + |
| 329 | + # ── Print results ───────────────────────────────────────────────────────── |
| 330 | + no_key = [r for r in results if not r.needs_key] |
| 331 | + with_key = [r for r in results if r.needs_key] |
| 332 | + |
| 333 | + def render_group(title: str, group: list[Result]) -> None: |
| 334 | + if not group: |
| 335 | + return |
| 336 | + print(f"\n{'━' * 72}") |
| 337 | + print(f" {title}") |
| 338 | + print(f"{'━' * 72}") |
| 339 | + for r in sorted(group, key=lambda x: x.server): |
| 340 | + icon = "✅" if r.ok else "❌" |
| 341 | + timing = f"{r.elapsed:.1f}s" |
| 342 | + if r.ok: |
| 343 | + detail = r.preview[:58] |
| 344 | + elif r.missing_keys: |
| 345 | + detail = f"not set in .env: {', '.join(r.missing_keys)}" |
| 346 | + else: |
| 347 | + detail = (r.error or r.preview)[:58] |
| 348 | + print(f" {icon} {r.server:<30} {timing:>6} {detail}") |
| 349 | + |
| 350 | + render_group("No API key required", no_key) |
| 351 | + render_group("API key required", with_key) |
| 352 | + |
| 353 | + passed = sum(1 for r in results if r.ok) |
| 354 | + tested = len(results) |
| 355 | + failed = [r for r in results if not r.ok] |
| 356 | + |
| 357 | + print(f"\n{'━' * 72}") |
| 358 | + if only_server: |
| 359 | + print(f" Result: {passed}/{tested} passed (filtered to '{only_server}')", end="") |
| 360 | + else: |
| 361 | + print(f" Result: {passed}/{total} passed ({tested} tested, {total - tested} no test defined)", end="") |
| 362 | + if failed: |
| 363 | + print(f"\n Failed: {', '.join(r.server for r in failed)}") |
| 364 | + else: |
| 365 | + print(" 🎉 All clear!") |
| 366 | + print(f"{'━' * 72}\n") |
| 367 | + |
| 368 | + if failed: |
| 369 | + raise SystemExit(1) |
| 370 | + |
| 371 | + |
| 372 | +if __name__ == "__main__": |
| 373 | + parser = argparse.ArgumentParser(description="Test all MCP servers from the template") |
| 374 | + parser.add_argument("--timeout", type=float, default=30, |
| 375 | + help="Per-request timeout in seconds (default: 30)") |
| 376 | + parser.add_argument("--concurrency", type=int, default=8, |
| 377 | + help="Max parallel requests (default: 8)") |
| 378 | + parser.add_argument("--server", metavar="NAME", |
| 379 | + help="Test only this server (e.g. --server github)") |
| 380 | + args = parser.parse_args() |
| 381 | + |
| 382 | + asyncio.run(main(args.timeout, args.concurrency, args.server)) |
0 commit comments