Skip to content

Commit fbc3834

Browse files
jace-ryanclaude
andcommitted
test: add 60 tests — API, integration, vector store, adapters, scenarios
5 test modules covering the full stack: - test_api: endpoint contracts, validation, search ranking quality - test_integration: full lifecycle, context passing, error propagation, re-ingest safety, multi-turn chat behavior - test_vector_store: chunking overlap, content preservation, skip rules - test_adapters: BYOM registry, unknown provider rejection, ABC enforcement - test_scenarios: corpus integrity, distribution, MITRE coverage, generator determinism (same seed = identical output) Tests use real ChromaDB with real playbooks (temp dir), mock only LLM. Search ranking tests verify ransomware→ransomware, phishing→phishing. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 520b2b5 commit fbc3834

6 files changed

Lines changed: 765 additions & 0 deletions

File tree

tests/conftest.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
"""Shared test fixtures.
2+
3+
Uses a temporary directory for ChromaDB so tests never touch
4+
the real vector store. Playbooks are ingested once per session.
5+
"""
6+
7+
import os
8+
import sys
9+
import tempfile
10+
11+
import pytest
12+
13+
# Allow imports from server/
14+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "server"))
15+
16+
# Point ChromaDB at a temp dir before anything imports config
17+
_tmp_chroma = tempfile.mkdtemp(prefix="gp_test_chroma_")
18+
os.environ.setdefault("CHROMA_DIR", _tmp_chroma)
19+
os.environ.setdefault("LLM_PROVIDER", "anthropic")
20+
os.environ.setdefault("ANTHROPIC_API_KEY", "sk-ant-test-fake-key-for-tests")
21+
22+
23+
@pytest.fixture(scope="session")
24+
def ingested_playbooks():
25+
"""Ingest the real playbooks once. Returns the summary dict."""
26+
from vector_store import ingest_playbooks
27+
28+
result = ingest_playbooks()
29+
assert result["files_ingested"] > 0, "No playbooks found — check playbooks/ dir"
30+
return result
31+
32+
33+
@pytest.fixture(scope="session")
34+
def client(ingested_playbooks):
35+
"""FastAPI TestClient with playbooks already loaded."""
36+
from fastapi.testclient import TestClient
37+
38+
from app import app
39+
40+
return TestClient(app)

tests/test_adapters.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
"""Adapter registry and instantiation tests.
2+
3+
Verifies that the BYOM layer actually wires up correctly —
4+
wrong provider string shouldn't silently give you a default,
5+
and missing API keys should fail before the first LLM call,
6+
not inside it.
7+
"""
8+
9+
import os
10+
import sys
11+
12+
import pytest
13+
14+
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "server"))
15+
16+
17+
def test_get_adapter_returns_anthropic_by_default(monkeypatch):
18+
monkeypatch.setenv("LLM_PROVIDER", "anthropic")
19+
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test")
20+
# Force reload of config module to pick up new env
21+
import importlib
22+
import config
23+
importlib.reload(config)
24+
25+
from adapters import get_adapter
26+
adapter = get_adapter()
27+
assert adapter.model_name # should have a model name
28+
assert "AnthropicAdapter" in type(adapter).__name__
29+
30+
31+
def test_get_adapter_rejects_unknown_provider(monkeypatch):
32+
monkeypatch.setenv("LLM_PROVIDER", "gpt-magic-3000")
33+
import importlib
34+
import config
35+
importlib.reload(config)
36+
37+
from adapters import get_adapter
38+
with pytest.raises(ValueError, match="Unknown LLM_PROVIDER"):
39+
get_adapter()
40+
41+
42+
def test_adapter_model_name_matches_config(monkeypatch):
43+
monkeypatch.setenv("LLM_PROVIDER", "anthropic")
44+
monkeypatch.setenv("LLM_MODEL", "claude-haiku-4-5-20251001")
45+
monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test")
46+
import importlib
47+
import config
48+
importlib.reload(config)
49+
50+
from adapters import get_adapter
51+
adapter = get_adapter()
52+
assert adapter.model_name == "claude-haiku-4-5-20251001"
53+
54+
55+
def test_all_providers_importable():
56+
"""Every adapter module should import without errors,
57+
even if the SDK isn't installed (that's a runtime error, not import)."""
58+
from adapters.base import LLMAdapter
59+
assert LLMAdapter # ABC is always importable
60+
61+
62+
def test_adapter_interface_is_enforced():
63+
"""Subclassing LLMAdapter without implementing complete() should fail."""
64+
from adapters.base import LLMAdapter
65+
66+
with pytest.raises(TypeError):
67+
class BadAdapter(LLMAdapter):
68+
pass
69+
BadAdapter()

tests/test_api.py

Lines changed: 251 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
"""API endpoint tests.
2+
3+
These hit the real FastAPI app with a real ChromaDB backend
4+
(populated with actual playbook content). The only thing mocked
5+
is the LLM — we don't want tests that cost money on every run.
6+
7+
Tests focus on behavior a user would actually encounter:
8+
- Does search rank the right playbook first?
9+
- Does the chat flow work across multiple turns?
10+
- Do validation rules reject garbage input?
11+
- Is ingestion idempotent (no duplicate chunks)?
12+
"""
13+
14+
from unittest.mock import patch
15+
16+
17+
# ---------------------------------------------------------------------------
18+
# Health check
19+
# ---------------------------------------------------------------------------
20+
21+
22+
def test_health_returns_200(client):
23+
resp = client.get("/api/health")
24+
assert resp.status_code == 200
25+
body = resp.json()
26+
assert body["service"] == "gone-phishing"
27+
assert "checks" in body
28+
29+
30+
def test_health_reports_chromadb_status(client):
31+
"""After ingestion, ChromaDB should show playbooks > 0."""
32+
resp = client.get("/api/health")
33+
checks = {c["name"]: c for c in resp.json()["checks"]}
34+
assert checks["chromadb"]["ok"] is True
35+
assert checks["chromadb"]["playbooks"] > 0
36+
37+
38+
# ---------------------------------------------------------------------------
39+
# Playbook listing
40+
# ---------------------------------------------------------------------------
41+
42+
43+
def test_list_playbooks_not_empty(client):
44+
resp = client.get("/api/playbooks")
45+
assert resp.status_code == 200
46+
playbooks = resp.json()["playbooks"]
47+
assert len(playbooks) > 0
48+
49+
50+
def test_playbooks_include_ransomware(client):
51+
"""Sanity check — ransomware.md should always be ingested."""
52+
resp = client.get("/api/playbooks")
53+
types = [p["playbook_type"] for p in resp.json()["playbooks"]]
54+
assert "ransomware" in types
55+
56+
57+
def test_playbooks_exclude_readme_and_template(client):
58+
"""README.md and full-irp-template.md should be skipped."""
59+
resp = client.get("/api/playbooks")
60+
types = [p["playbook_type"] for p in resp.json()["playbooks"]]
61+
assert "README" not in types
62+
assert "full-irp-template" not in types
63+
64+
65+
# ---------------------------------------------------------------------------
66+
# Semantic search
67+
# ---------------------------------------------------------------------------
68+
69+
70+
def test_search_returns_results(client):
71+
resp = client.post("/api/search", json={"query": "ransomware encrypted files"})
72+
assert resp.status_code == 200
73+
results = resp.json()["results"]
74+
assert len(results) > 0
75+
76+
77+
def test_search_ransomware_ranks_ransomware_playbook_first(client):
78+
"""A ransomware query should surface the ransomware playbook, not phishing."""
79+
resp = client.post(
80+
"/api/search",
81+
json={"query": "files encrypted with ransom note demanding bitcoin", "n_results": 3},
82+
)
83+
top_type = resp.json()["results"][0]["playbook_type"]
84+
assert top_type == "ransomware", f"Expected ransomware first, got {top_type}"
85+
86+
87+
def test_search_phishing_ranks_phishing_playbook_first(client):
88+
resp = client.post(
89+
"/api/search",
90+
json={"query": "user clicked suspicious email link entered credentials", "n_results": 3},
91+
)
92+
top_type = resp.json()["results"][0]["playbook_type"]
93+
assert top_type == "phishing", f"Expected phishing first, got {top_type}"
94+
95+
96+
def test_search_bec_ranks_bec_playbook_first(client):
97+
resp = client.post(
98+
"/api/search",
99+
json={"query": "fraudulent wire transfer CEO impersonation", "n_results": 3},
100+
)
101+
top_type = resp.json()["results"][0]["playbook_type"]
102+
assert top_type == "bec", f"Expected bec first, got {top_type}"
103+
104+
105+
def test_search_respects_n_results(client):
106+
resp = client.post("/api/search", json={"query": "incident", "n_results": 2})
107+
assert len(resp.json()["results"]) == 2
108+
109+
110+
def test_search_relevance_is_between_0_and_1(client):
111+
resp = client.post("/api/search", json={"query": "malware detected"})
112+
for r in resp.json()["results"]:
113+
assert 0 <= r["relevance"] <= 1, f"Relevance out of range: {r['relevance']}"
114+
115+
116+
# ---------------------------------------------------------------------------
117+
# Ingest
118+
# ---------------------------------------------------------------------------
119+
120+
121+
def test_ingest_is_idempotent(client):
122+
"""Re-ingesting should not duplicate chunks."""
123+
resp1 = client.post("/api/ingest")
124+
resp2 = client.post("/api/ingest")
125+
assert resp1.json()["total_chunks"] == resp2.json()["total_chunks"]
126+
127+
128+
# ---------------------------------------------------------------------------
129+
# Incident endpoint (LLM mocked)
130+
# ---------------------------------------------------------------------------
131+
132+
133+
def _fake_plan(*args, **kwargs):
134+
return "## Test Action Plan\n\n- [ ] **IR Lead**: Isolate affected systems"
135+
136+
137+
def test_incident_returns_action_plan(client):
138+
with patch("app.generate_action_plan", side_effect=_fake_plan):
139+
resp = client.post(
140+
"/api/incident",
141+
json={"description": "User clicked phishing link and entered M365 credentials"},
142+
)
143+
assert resp.status_code == 200
144+
body = resp.json()
145+
assert "action_plan" in body
146+
assert "matched_playbooks" in body
147+
assert len(body["matched_playbooks"]) > 0
148+
149+
150+
def test_incident_returns_top3_matched_playbooks(client):
151+
with patch("app.generate_action_plan", side_effect=_fake_plan):
152+
resp = client.post(
153+
"/api/incident",
154+
json={"description": "Ransomware encrypted the file server, ransom note on screen"},
155+
)
156+
playbooks = resp.json()["matched_playbooks"]
157+
assert len(playbooks) <= 3
158+
for p in playbooks:
159+
assert "type" in p
160+
assert "relevance" in p
161+
162+
163+
def test_incident_rejects_empty_description(client):
164+
resp = client.post("/api/incident", json={"description": ""})
165+
assert resp.status_code == 422
166+
167+
168+
def test_incident_rejects_too_short_description(client):
169+
resp = client.post("/api/incident", json={"description": "hi"})
170+
assert resp.status_code == 422
171+
172+
173+
def test_incident_accepts_optional_severity(client):
174+
with patch("app.generate_action_plan", side_effect=_fake_plan):
175+
resp = client.post(
176+
"/api/incident",
177+
json={
178+
"description": "Multiple workstations showing ransom notes",
179+
"severity": "S1",
180+
"affected_systems": "DC01, FS01, WS-ACCT-*",
181+
},
182+
)
183+
assert resp.status_code == 200
184+
185+
186+
# ---------------------------------------------------------------------------
187+
# Chat endpoint (LLM mocked)
188+
# ---------------------------------------------------------------------------
189+
190+
191+
def _fake_chat(*args, **kwargs):
192+
return "The next step would be to check for lateral movement indicators."
193+
194+
195+
def test_chat_returns_response(client):
196+
with patch("app.chat_response", side_effect=_fake_chat):
197+
resp = client.post(
198+
"/api/chat",
199+
json={
200+
"messages": [
201+
{"role": "user", "content": "What should we check next?"},
202+
]
203+
},
204+
)
205+
assert resp.status_code == 200
206+
assert "response" in resp.json()
207+
208+
209+
def test_chat_handles_multi_turn_conversation(client):
210+
"""Simulate a real chat — initial incident then follow-ups."""
211+
with patch("app.chat_response", side_effect=_fake_chat):
212+
resp = client.post(
213+
"/api/chat",
214+
json={
215+
"messages": [
216+
{"role": "user", "content": "A user clicked a phishing link"},
217+
{"role": "assistant", "content": "Here is the action plan..."},
218+
{"role": "user", "content": "Should we also check email forwarding rules?"},
219+
]
220+
},
221+
)
222+
assert resp.status_code == 200
223+
assert len(resp.json()["response"]) > 0
224+
225+
226+
def test_chat_with_empty_messages_returns_200(client):
227+
"""Edge case: empty message list shouldn't crash."""
228+
with patch("app.chat_response", side_effect=_fake_chat):
229+
resp = client.post("/api/chat", json={"messages": []})
230+
# Should not 500 — either returns a response or a validation error
231+
assert resp.status_code in (200, 422)
232+
233+
234+
# ---------------------------------------------------------------------------
235+
# Validation edge cases
236+
# ---------------------------------------------------------------------------
237+
238+
239+
def test_search_rejects_single_char_query(client):
240+
resp = client.post("/api/search", json={"query": "x"})
241+
assert resp.status_code == 422
242+
243+
244+
def test_search_rejects_n_results_over_20(client):
245+
resp = client.post("/api/search", json={"query": "ransomware", "n_results": 50})
246+
assert resp.status_code == 422
247+
248+
249+
def test_search_rejects_n_results_zero(client):
250+
resp = client.post("/api/search", json={"query": "ransomware", "n_results": 0})
251+
assert resp.status_code == 422

0 commit comments

Comments
 (0)