From 35d7d016590b85cf4f236b3a321a254e1c950eb3 Mon Sep 17 00:00:00 2001
From: stacknil <stacknil@proton.me>
Date: Thu, 21 May 2026 15:38:17 +0800
Subject: [PATCH 1/2] Add reviewer brief

---
 docs/reviewer-brief.md | 66 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 docs/reviewer-brief.md

diff --git a/docs/reviewer-brief.md b/docs/reviewer-brief.md
new file mode 100644
index 0000000..649906a
--- /dev/null
+++ b/docs/reviewer-brief.md
@@ -0,0 +1,66 @@
+# Reviewer brief
+
+## Problem
+
+Telemetry and detection projects often look impressive in screenshots but are hard to review end to end. Reviewers need a narrow, reproducible path from raw events to outputs without a production stack or opaque AI behavior.
+
+## What it does
+
+`telemetry-lab` is a local, file-based portfolio repo with four demos:
+
+- `telemetry-window-demo` for sliding-window features and rule-based alerts
+- `ai-assisted-detection-demo` for deterministic case grouping plus bounded JSON-only LLM drafting
+- `rule-evaluation-and-dedup-demo` for cooldown and suppression reasoning
+- `config-change-investigation-demo` for risky-change evidence correlation
+
+## Quick run
+
+```bash
+python -m pip install -e ".[dev]"
+python -m telemetry_window_demo.cli run --config configs/default.yaml
+python -m telemetry_window_demo.cli run-rule-dedup-demo
+python -m telemetry_window_demo.cli run-config-change-demo
+python -m telemetry_window_demo.cli run-ai-demo
+```
+
+## Sample output
+
+The default `run --config configs/default.yaml` path regenerates:
+
+- `data/processed/features.csv`
+- `data/processed/alerts.csv`
+- `data/processed/summary.json`
+- three PNG timelines under `data/processed/`
+
+The current committed default sample reports:
+
+- `41` normalized events
+- `24` windows
+- `12` alerts after a `60` second cooldown
+
+The other demos emit reviewer-facing artifacts such as `dedup_report.md`, `investigation_report.md`, and `case_report.md`.
+
+## What this proves
+
+- telemetry normalization and windowed feature design
+- alert logic that stays reviewable instead of disappearing into scoring
+- bounded, explicitly non-autonomous AI use
+- reviewer-friendly artifact generation across multiple demo shapes
+
+## Safety / boundaries
+
+- local sample-data workflows only
+- no real-time ingestion or autonomous response
+- no final incident verdicts from the AI-assisted demo
+- public review focus, not production deployment claims
+
+## Limitations
+
+- no alert routing, dashboarding, or case management
+- sample-data driven, not connected to live systems
+- no streaming state management
+- intentionally small-scope demos rather than a unified monitoring platform
+
+## Next milestone
+
+Add another compact investigation path that strengthens the bridge from telemetry features to analyst-facing monitoring and detection workflows.

From 086f5bf47552525fa0dc3030b1bca80b60e76d43 Mon Sep 17 00:00:00 2001
From: stacknil <stacknil@proton.me>
Date: Thu, 21 May 2026 22:47:28 +0800
Subject: [PATCH 2/2] docs(review): strengthen demo coherence

---
 .gitignore                                    |   1 +
 CONTRIBUTING.md                               |   2 +-
 README.md                                     |  44 +++--
 demos/ai-assisted-detection-demo/README.md    |  59 ++++--
 .../artifacts/case_report.md                  | 170 +++++++++---------
 .../README.md                                 |  11 ++
 .../rule-evaluation-and-dedup-demo/README.md  |  10 ++
 docs/design-notes.md                          |   2 +-
 docs/reviewer-brief.md                        |   8 +
 docs/reviewer-path.md                         |  39 ++++
 .../ai_assisted_detection_demo/pipeline.py    | 109 ++++++-----
 .../pipeline.py                               |   9 +-
 .../pipeline.py                               |   9 +-
 src/telemetry_window_demo/windowing.py        |   3 +
 tests/test_ai_assisted_detection_demo.py      |  52 ++++--
 tests/test_cli_subprocess.py                  | 111 ++++++++++++
 .../test_config_change_investigation_demo.py  |   8 +
 tests/test_rule_evaluation_and_dedup_demo.py  |  10 ++
 tests/test_windowing.py                       |  30 ++++
 19 files changed, 509 insertions(+), 178 deletions(-)
 create mode 100644 docs/reviewer-path.md
 create mode 100644 tests/test_cli_subprocess.py

diff --git a/.gitignore b/.gitignore
index b3b3f0a..e45129c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,7 @@ __pycache__/
 *.py[cod]
 *.egg-info/
 .pytest_cache/
+.pytest-artifacts*/
 .coverage
 .venv/
 venv/
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 554540b..7b90aa6 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -17,7 +17,7 @@ Thanks for the interest. This is a solo-maintainer portfolio repository, so the
 - run:
 
   ```bash
-  python -m pip install -e .
+  python -m pip install -e ".[dev]"
   pytest
   ```
 
diff --git a/README.md b/README.md
index e1cdfea..8261140 100644
--- a/README.md
+++ b/README.md
@@ -2,7 +2,7 @@
 
 [![CI](https://github.com/stacknil/telemetry-lab/actions/workflows/ci.yml/badge.svg)](https://github.com/stacknil/telemetry-lab/actions/workflows/ci.yml)
 
-Small portfolio prototypes for telemetry analytics, monitoring, and detection-oriented signal processing.
+A local, file-based detection workflow lab for reviewer-verifiable telemetry and detection demos.
 
 Latest milestone: [v0.6.0 — fourth demo and config-change investigation](https://github.com/stacknil/telemetry-lab/releases/latest).
 
@@ -22,7 +22,7 @@ Latest milestone: [v0.6.0 — fourth demo and config-change investigation](https
 
 ## What This Repo Is
 
-`telemetry-lab` is a small portfolio repository for telemetry analytics and constrained detection-oriented workflows. It is organized as four local, file-based demos that are reproducible from committed sample data and intentionally scoped for public review rather than production use.
+`telemetry-lab` is a small portfolio repository for constrained detection workflows. It is not a SIEM, dashboard, or monitoring platform; it is organized as four local, file-based demos that are reproducible from committed sample data and intentionally scoped for public review rather than production use.
 
 ### telemetry-window-demo
 
@@ -47,6 +47,9 @@ python -m pip install -e .
 python -m telemetry_window_demo.cli run --config configs/default.yaml
 ```
 
+Use the same Python interpreter for install, tests, and demo commands. On machines with multiple Python installs, replace `python` with the intended interpreter path.
+To run the test suite in a fresh environment, install the dev extra with `python -m pip install -e ".[dev]"`.
+
 Other demo entrypoints:
 
 - `python -m telemetry_window_demo.cli run-ai-demo`
@@ -56,9 +59,10 @@ Other demo entrypoints:
 Useful inspection commands:
 
 - `python -m telemetry_window_demo.cli summarize --input data/raw/sample_events.jsonl`
-- `python -m telemetry_window_demo.cli summarize --input events.csv --timestamp-col event_time`
 
-That command reads `data/raw/sample_events.jsonl` and regenerates:
+For CSV inputs, pass a `.csv` file to `--input`; use `--timestamp-col` when the timestamp column is not named `timestamp`.
+
+The `run --config configs/default.yaml` command reads `data/raw/sample_events.jsonl` and regenerates:
 
 - `data/processed/features.csv`
 - `data/processed/alerts.csv`
@@ -73,17 +77,26 @@ With the bundled default sample, the current repo state produces:
 - `24` windows
 - `12` alerts after a `60` second cooldown
 
-Why it is worth a quick look:
-
-- it shows a full telemetry path from raw events to operator-facing outputs
-- the sample inputs and outputs are reproducible in-repo
-- a second bundled scenario gives a slightly richer walkthrough without changing the basic CLI flow
-
-![Default alert timeline](data/processed/alerts_timeline.png)
-
-## Demo Variants
-
-Default sample:
+Why it is worth a quick look:
+
+- it shows a full telemetry path from raw events to operator-facing outputs
+- the sample inputs and outputs are reproducible in-repo
+- a second bundled scenario gives a slightly richer walkthrough without changing the basic CLI flow
+
+![Default alert timeline](data/processed/alerts_timeline.png)
+
+## Reviewer Path
+
+For a quick coherence pass across the demos:
+
+1. Run `python -m telemetry_window_demo.cli run --config configs/default.yaml` and confirm `data/processed/summary.json` reports `41` events, `24` windows, and `12` alerts.
+2. Run `python -m telemetry_window_demo.cli run-rule-dedup-demo` and confirm `demos/rule-evaluation-and-dedup-demo/artifacts/dedup_report.md` shows `10` raw hits reduced to `6` retained alerts with `4` suppressions.
+3. Run `python -m telemetry_window_demo.cli run-config-change-demo` and confirm `demos/config-change-investigation-demo/artifacts/investigation_report.md` shows `4` normalized changes, `3` risky changes, and `3` investigations.
+4. Run `python -m telemetry_window_demo.cli run-ai-demo` and confirm `demos/ai-assisted-detection-demo/artifacts/case_report.md` shows `3` deterministic cases with human verification and no final incident verdict.
+
+## Demo Variants
+
+Default sample:
 
 - config: [`configs/default.yaml`](configs/default.yaml)
 - input: `data/raw/sample_events.jsonl`
@@ -129,6 +142,7 @@ Cooldown behavior:
 
 - [`demos/rule-evaluation-and-dedup-demo/README.md`](demos/rule-evaluation-and-dedup-demo/README.md) explains the third demo and links its committed before/after dedup artifacts
 - [`demos/config-change-investigation-demo/README.md`](demos/config-change-investigation-demo/README.md) explains the config-change investigation demo and its committed artifacts
+- [`docs/reviewer-path.md`](docs/reviewer-path.md) maps common review questions to the right demo and artifacts
 - [`docs/sample-output.md`](docs/sample-output.md) summarizes the committed sample artifacts
 - [`docs/roadmap.md`](docs/roadmap.md) sketches the next demo directions
 - [`data/processed/summary.json`](data/processed/summary.json) captures the default run in machine-readable form
diff --git a/demos/ai-assisted-detection-demo/README.md b/demos/ai-assisted-detection-demo/README.md
index 24b9c6e..e8b7562 100644
--- a/demos/ai-assisted-detection-demo/README.md
+++ b/demos/ai-assisted-detection-demo/README.md
@@ -65,23 +65,32 @@ Generated artifacts are written to `demos/ai-assisted-detection-demo/artifacts/`
 - deterministic rules: `config/rules.yaml`
 - structured output schema: `config/llm_case_output_schema.json`
 
-## Expected artifacts
-
-- `artifacts/rule_hits.json`
-- `artifacts/case_bundles.json`
-- `artifacts/case_summaries.json`
-- `artifacts/case_report.md`
-- `artifacts/audit_traces.jsonl`
-
-The bundled sample data is designed to produce at least three generated cases.
-
-## Artifact semantics
+## Expected artifacts
+
+- `artifacts/rule_hits.json`
+- `artifacts/case_bundles.json`
+- `artifacts/case_summaries.json`
+- `artifacts/case_report.md`
+- `artifacts/audit_traces.jsonl`
+
+## Expected run summary
+
+The bundled sample run should report:
+
+- `15` raw events normalized into `15` internal events
+- `5` deterministic rule hits
+- `3` grouped cases
+- `3` accepted JSON summaries
+- `0` rejected summaries in the default accepted path
+- `3` audit records
+
+## Artifact semantics
 
 - `rule_hits.json`: deterministic rule hits with rule metadata, ATT&CK mapping, entities, and evidence highlights
 - `case_bundles.json`: grouped cases with severity, rule hits, ATT&CK mappings, raw evidence, and untrusted-data marking
 - `case_summaries.json`: only accepted JSON summaries that passed schema and semantic validation
-- `case_report.md`: analyst-facing report that shows accepted summaries and explicitly notes rejected case summaries
-- `case_report.md`: includes a top-level run integrity section that surfaces rule/config degradation
+- `case_report.md`: analyst-facing report with run counts, accepted summaries, and explicit notes for rejected case summaries
+- `case_report.md`: includes a top-level run integrity section that surfaces rule/config degradation
 - `audit_traces.jsonl`: stable per-record audit log for accepted and rejected paths, using `schema_version = ai-assisted-detection-audit/v1` and including `ts`, `case_id`, `validation_status`, `rejection_reason`, `rule_ids`, `prompt_input_digest`, `evidence_digest`, and bounded response excerpts
 
 ## Rejection behavior
@@ -103,15 +112,27 @@ Use the default sample run artifacts in `artifacts/case_summaries.json`, `artifa
 
 Verify that `CASE-001` appears in all three places, that the `case_id` matches exactly, that `human_verification` is `required`, and that the audit record shows `validation_status = accepted` with `schema_version = ai-assisted-detection-audit/v1`.
 
-### Rejected summary path
-
-Run `pytest tests/test_ai_assisted_detection_demo.py -k "audit_traces_capture_accepted_and_rejected_paths or case_id_mismatch"` and inspect the `case_report.md`, `case_summaries.json`, and `audit_traces.jsonl` artifacts written by the test.
+### Rejected summary path
+
+Run:
+
+```bash
+pytest tests/test_ai_assisted_detection_demo.py -k "audit_traces_capture_accepted_and_rejected_paths or case_id_mismatch" --basetemp .pytest-artifacts-ai-demo-rejections
+```
+
+Then inspect the `case_report.md`, `case_summaries.json`, and `audit_traces.jsonl` files under `.pytest-artifacts-ai-demo-rejections/test_*/artifacts/`.
 
 Verify that the rejected case is absent from `case_summaries.json`, appears in `case_report.md` as `Summary status: rejected`, and has an audit record with `validation_status = rejected` plus a concrete `rejection_reason` such as `missing_required_fields`, `semantic_validation_failed`, or `case_id_mismatch`.
 
-### Degraded coverage path
-
-Run `pytest tests/test_ai_assisted_detection_demo.py -k malformed_attack_metadata_is_rejected_and_recorded` and inspect the generated `case_report.md` and `audit_traces.jsonl`.
+### Degraded coverage path
+
+Run:
+
+```bash
+pytest tests/test_ai_assisted_detection_demo.py -k malformed_attack_metadata_is_rejected_and_recorded --basetemp .pytest-artifacts-ai-demo-degraded
+```
+
+Then inspect the generated `case_report.md` and `audit_traces.jsonl` files under `.pytest-artifacts-ai-demo-degraded/test_*/artifacts/`.
 
 Verify that `case_report.md` exposes `## Run Integrity`, `coverage_degraded: yes`, and the rejected rule id, and that `audit_traces.jsonl` contains a global rejection record with `case_id = null` and `rejection_reason = rule_metadata_validation_failed`.
 
diff --git a/demos/ai-assisted-detection-demo/artifacts/case_report.md b/demos/ai-assisted-detection-demo/artifacts/case_report.md
index 3338ff7..845917d 100644
--- a/demos/ai-assisted-detection-demo/artifacts/case_report.md
+++ b/demos/ai-assisted-detection-demo/artifacts/case_report.md
@@ -1,80 +1,90 @@
-# AI-Assisted Detection Demo Report
-
-This report is analyst-facing draft output from a constrained case summarization pipeline.
-Detections and grouping are deterministic. The LLM is limited to structured summarization only.
-Human verification is required. No automated response actions or final incident verdicts are produced.
-
-## Run Integrity
-
-- accepted_rules: AUTH-001, AUTH-002, PROC-001, WEB-001
-- rejected_rules: none
-- coverage_degraded: no
-- rejection_reasons: none
-
-## CASE-001
-
-- Severity: high
-- First seen: 2026-03-27T09:01:55Z
-- Last seen: 2026-03-27T09:02:20Z
-- Rule hits: repeated_failed_logins, successful_login_after_failures
-- ATT&CK: T1110, T1078
-
-Summary: CASE-001 contains 2 deterministic rule hits covering repeated_failed_logins, successful_login_after_failures for principal ops_admin; src_ip 198.51.100.24; host vpn-gw-01 during 2026-03-27T09:01:55Z to 2026-03-27T09:02:20Z. The case warrants analyst review but does not imply a final incident decision.
-
-Likely causes:
-- Repeated password guessing or credential stuffing against the targeted account.
-- A valid credential may have been used after several failed login attempts.
-
-Uncertainty notes:
-- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
-- The case summary is advisory only and requires human review before any incident classification.
-
-Suggested next steps:
-- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
-- Check authentication context for MFA state, prior successful logins, and expected source locations.
-- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
-
-## CASE-002
-
-- Severity: medium
-- First seen: 2026-03-27T09:11:10Z
-- Last seen: 2026-03-27T09:11:10Z
-- Rule hits: sensitive_path_scan
-- ATT&CK: T1595
-
-Summary: CASE-002 contains 1 deterministic rule hits covering sensitive_path_scan for src_ip 203.0.113.77; host portal-01 during 2026-03-27T09:11:10Z to 2026-03-27T09:11:10Z. The case warrants analyst review but does not imply a final incident decision.
-
-Likely causes:
-- The source IP appears to be probing sensitive web paths on the exposed application.
-
-Uncertainty notes:
-- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
-- The case summary is advisory only and requires human review before any incident classification.
-- Prompt-like text appeared in telemetry and was treated strictly as untrusted evidence.
-
-Suggested next steps:
-- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
-- Compare the web requests with reverse-proxy and WAF logs to determine whether the probing continued.
-- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
-
-## CASE-003
-
-- Severity: high
-- First seen: 2026-03-27T09:20:00Z
-- Last seen: 2026-03-27T09:20:20Z
-- Rule hits: encoded_powershell_execution, encoded_powershell_execution
-- ATT&CK: T1059.001
-
-Summary: CASE-003 contains 2 deterministic rule hits covering encoded_powershell_execution for principal lab_user; host wkstn-07 during 2026-03-27T09:20:00Z to 2026-03-27T09:20:20Z. The case warrants analyst review but does not imply a final incident decision.
-
-Likely causes:
-- Obfuscated PowerShell execution may reflect manual tradecraft or an unsafe script.
-
-Uncertainty notes:
-- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
-- The case summary is advisory only and requires human review before any incident classification.
-
-Suggested next steps:
-- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
-- Inspect the originating host timeline and validate whether the encoded PowerShell command matches known tooling.
-- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
+# AI-Assisted Detection Demo Report
+
+This report is analyst-facing draft output from a constrained case summarization pipeline.
+Detections and grouping are deterministic. The LLM is limited to structured summarization only.
+Human verification is required. No automated response actions or final incident verdicts are produced.
+
+## Run Summary
+
+- raw_events: 15
+- normalized_events: 15
+- rule_hits: 5
+- cases: 3
+- accepted_summaries: 3
+- rejected_summaries: 0
+- audit_records: 3
+
+## Run Integrity
+
+- accepted_rules: AUTH-001, AUTH-002, PROC-001, WEB-001
+- rejected_rules: none
+- coverage_degraded: no
+- rejection_reasons: none
+
+## CASE-001
+
+- Severity: high
+- First seen: 2026-03-27T09:01:55Z
+- Last seen: 2026-03-27T09:02:20Z
+- Rule hits: repeated_failed_logins, successful_login_after_failures
+- ATT&CK: T1110, T1078
+
+Summary: CASE-001 contains 2 deterministic rule hits covering repeated_failed_logins, successful_login_after_failures for principal ops_admin; src_ip 198.51.100.24; host vpn-gw-01 during 2026-03-27T09:01:55Z to 2026-03-27T09:02:20Z. The case warrants analyst review but does not imply a final incident decision.
+
+Likely causes:
+- Repeated password guessing or credential stuffing against the targeted account.
+- A valid credential may have been used after several failed login attempts.
+
+Uncertainty notes:
+- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
+- The case summary is advisory only and requires human review before any incident classification.
+
+Suggested next steps:
+- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
+- Check authentication context for MFA state, prior successful logins, and expected source locations.
+- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
+
+## CASE-002
+
+- Severity: medium
+- First seen: 2026-03-27T09:11:10Z
+- Last seen: 2026-03-27T09:11:10Z
+- Rule hits: sensitive_path_scan
+- ATT&CK: T1595
+
+Summary: CASE-002 contains 1 deterministic rule hits covering sensitive_path_scan for src_ip 203.0.113.77; host portal-01 during 2026-03-27T09:11:10Z to 2026-03-27T09:11:10Z. The case warrants analyst review but does not imply a final incident decision.
+
+Likely causes:
+- The source IP appears to be probing sensitive web paths on the exposed application.
+
+Uncertainty notes:
+- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
+- The case summary is advisory only and requires human review before any incident classification.
+- Prompt-like text appeared in telemetry and was treated strictly as untrusted evidence.
+
+Suggested next steps:
+- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
+- Compare the web requests with reverse-proxy and WAF logs to determine whether the probing continued.
+- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
+
+## CASE-003
+
+- Severity: high
+- First seen: 2026-03-27T09:20:00Z
+- Last seen: 2026-03-27T09:20:20Z
+- Rule hits: encoded_powershell_execution, encoded_powershell_execution
+- ATT&CK: T1059.001
+
+Summary: CASE-003 contains 2 deterministic rule hits covering encoded_powershell_execution for principal lab_user; host wkstn-07 during 2026-03-27T09:20:00Z to 2026-03-27T09:20:20Z. The case warrants analyst review but does not imply a final incident decision.
+
+Likely causes:
+- Obfuscated PowerShell execution may reflect manual tradecraft or an unsafe script.
+
+Uncertainty notes:
+- Telemetry is limited to the bundled sample evidence and does not confirm operator intent.
+- The case summary is advisory only and requires human review before any incident classification.
+
+Suggested next steps:
+- Review the raw evidence and confirm whether the activity aligns with an approved administrative task.
+- Inspect the originating host timeline and validate whether the encoded PowerShell command matches known tooling.
+- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict.
diff --git a/demos/config-change-investigation-demo/README.md b/demos/config-change-investigation-demo/README.md
index ed2ba4f..0a86235 100644
--- a/demos/config-change-investigation-demo/README.md
+++ b/demos/config-change-investigation-demo/README.md
@@ -58,6 +58,17 @@ Evidence is attached only when:
 - `artifacts/investigation_summary.json`
 - `artifacts/investigation_report.md`
 
+## Expected Run Summary
+
+The bundled sample run should report:
+
+- `4` normalized configuration changes
+- `3` risky changes
+- `3` investigations
+- `15` minute correlation window
+- `1` benign change that remains normalized but does not become an investigation
+- `1` risky break-glass investigation with no nearby supporting evidence
+
 ## Artifact Semantics
 
 - `change_events_normalized.json`: normalized config changes before any rule match is applied
diff --git a/demos/rule-evaluation-and-dedup-demo/README.md b/demos/rule-evaluation-and-dedup-demo/README.md
index ddbb90e..71fc80d 100644
--- a/demos/rule-evaluation-and-dedup-demo/README.md
+++ b/demos/rule-evaluation-and-dedup-demo/README.md
@@ -59,6 +59,16 @@ That means repeated hits for the same rule can still be kept separately when the
 - `artifacts/dedup_explanations.json`
 - `artifacts/dedup_report.md`
 
+## Expected Run Summary
+
+The bundled sample run should report:
+
+- `10` raw rule hits
+- `6` retained alerts
+- `4` suppressed repeated hits
+- `4` rule/scope groups
+- `180` second cooldown
+
 ## Artifact Semantics
 
 - `rule_hits_before_dedup.json`: normalized raw hits with resolved cooldown scope and cooldown key
diff --git a/docs/design-notes.md b/docs/design-notes.md
index d8d07f0..20bfd83 100644
--- a/docs/design-notes.md
+++ b/docs/design-notes.md
@@ -23,7 +23,7 @@ The implementation follows a narrow pipeline:
 - `pandas` is used for clarity and concise feature computation.
 - Rules stay threshold-based to emphasize detection semantics over model complexity.
 - Relative config paths are resolved from the repository root when the config lives under `configs/`.
-- Outputs are CSV and PNG because they are easy to inspect, diff, and embed in README material.
+- Outputs are CSV, JSON, and PNG: tables and summaries stay easy to diff, while PNG timelines are easy to inspect and embed in README material.
 - The notebook remains intentionally tiny so the packaged CLI pipeline stays the primary entrypoint.
 
 ## Non-goals
diff --git a/docs/reviewer-brief.md b/docs/reviewer-brief.md
index 649906a..42d3b9d 100644
--- a/docs/reviewer-brief.md
+++ b/docs/reviewer-brief.md
@@ -13,6 +13,14 @@ Telemetry and detection projects often look impressive in screenshots but are ha
 - `rule-evaluation-and-dedup-demo` for cooldown and suppression reasoning
 - `config-change-investigation-demo` for risky-change evidence correlation
 
+## Reviewer Evidence
+
+- Reproducible command: `python -m telemetry_window_demo.cli run --config configs/default.yaml`
+- Deterministic outputs: feature tables, alert tables, `summary.json`, PNG timelines, dedup reports, investigation reports, and bounded AI case reports.
+- Tests / CI: pytest coverage for windowing, CLI behavior, demo pipelines, artifact validation, and deterministic guardrails; GitHub Actions CI is enabled.
+- Release evidence: reviewer packs and release notes through the current `v0.6.0` milestone.
+- Non-goals: production monitoring, real-time ingestion, alert routing, autonomous response, dashboards, or final incident verdicts.
+
 ## Quick run
 
 ```bash
diff --git a/docs/reviewer-path.md b/docs/reviewer-path.md
new file mode 100644
index 0000000..a1bf3f3
--- /dev/null
+++ b/docs/reviewer-path.md
@@ -0,0 +1,39 @@
+# Reviewer Path
+
+`telemetry-lab` is a controlled detection workflow portfolio. It is not a SIEM, not a dashboard, and not an unfinished monitoring platform.
+
+The repo is intentionally local and file-based so reviewers can verify each workflow from committed sample inputs to generated artifacts without live infrastructure, alert routing, or autonomous response behavior.
+
+## Choose a demo by review question
+
+| Question | Demo | What to inspect |
+| --- | --- | --- |
+| How are raw events converted to alert features? | `telemetry-window-demo` | `data/processed/features.csv`, `data/processed/alerts.csv`, `data/processed/summary.json` |
+| How is AI constrained? | `ai-assisted-detection-demo` | `demos/ai-assisted-detection-demo/artifacts/case_summaries.json`, `demos/ai-assisted-detection-demo/artifacts/audit_traces.jsonl`, guardrails in `demos/ai-assisted-detection-demo/README.md` |
+| How are duplicate alerts reduced? | `rule-evaluation-and-dedup-demo` | `demos/rule-evaluation-and-dedup-demo/artifacts/rule_hits_before_dedup.json`, `demos/rule-evaluation-and-dedup-demo/artifacts/rule_hits_after_dedup.json`, `demos/rule-evaluation-and-dedup-demo/artifacts/dedup_explanations.json` |
+| How are risky config changes investigated? | `config-change-investigation-demo` | `demos/config-change-investigation-demo/artifacts/investigation_hits.json`, `demos/config-change-investigation-demo/artifacts/investigation_report.md` |
+
+## Fast verification commands
+
+From the repository root:
+
+Use the same Python interpreter for install, tests, and demo commands. On machines with multiple Python installs, replace `python` with the intended interpreter path.
+
+```bash
+python -m pip install -e ".[dev]"
+python -m telemetry_window_demo.cli run --config configs/default.yaml
+python -m telemetry_window_demo.cli run-ai-demo
+python -m telemetry_window_demo.cli run-rule-dedup-demo
+python -m telemetry_window_demo.cli run-config-change-demo
+pytest
+```
+
+## Expected boundaries
+
+- No production monitoring claims
+- No real-time ingestion or streaming state
+- No alert routing, dashboard, or case-management service
+- No autonomous response actions
+- No final incident verdicts from the AI-assisted demo
+
+The reviewer value is the workflow evidence: deterministic inputs, visible intermediate artifacts, constrained summaries, and reports that make detection behavior inspectable.
diff --git a/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py b/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py
index 265a7eb..d5c4d90 100644
--- a/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py
+++ b/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py
@@ -9,9 +9,10 @@
 from pathlib import Path
 from typing import Any
 
-import yaml
-
-from ..time_utils import parse_utc_timestamp
+import yaml
+
+from ..io import ensure_output_directory, ensure_output_file_path
+from ..time_utils import parse_utc_timestamp
 from .llm import DemoStructuredCaseLlm
 
 SEVERITY_ORDER = {"low": 1, "medium": 2, "high": 3, "critical": 4}
@@ -100,9 +101,9 @@ def run_demo(
     artifacts_dir: Path | None = None,
     llm: Any | None = None,
 ) -> dict[str, Any]:
-    demo_root = Path(demo_root or default_demo_root()).resolve()
-    artifacts_dir = Path(artifacts_dir or demo_root / "artifacts").resolve()
-    artifacts_dir.mkdir(parents=True, exist_ok=True)
+    demo_root = Path(demo_root or default_demo_root()).resolve()
+    artifacts_dir = Path(artifacts_dir or demo_root / "artifacts").resolve()
+    ensure_output_directory(artifacts_dir)
 
     raw_events = load_jsonl(demo_root / "data" / "raw" / "sample_security_events.jsonl")
     rules_config = load_yaml(demo_root / "config" / "rules.yaml")
@@ -241,21 +242,30 @@ def run_demo(
             )
         )
 
-    paths = {
-        "rule_hits": write_json(rule_hits, artifacts_dir / "rule_hits.json"),
-        "case_bundles": write_json(case_bundles, artifacts_dir / "case_bundles.json"),
-        "case_summaries": write_json(case_summaries, artifacts_dir / "case_summaries.json"),
-        "case_report": write_text(
-            build_case_report(
-                case_bundles,
-                case_summaries,
-                audit_records,
-                accepted_rule_ids=accepted_rule_ids,
-            ),
-            artifacts_dir / "case_report.md",
-        ),
-        "audit_traces": write_jsonl(audit_records, artifacts_dir / "audit_traces.jsonl"),
-    }
+    paths = {
+        "rule_hits": write_json(rule_hits, artifacts_dir / "rule_hits.json"),
+        "case_bundles": write_json(case_bundles, artifacts_dir / "case_bundles.json"),
+        "case_summaries": write_json(case_summaries, artifacts_dir / "case_summaries.json"),
+        "case_report": write_text(
+            build_case_report(
+                case_bundles,
+                case_summaries,
+                audit_records,
+                accepted_rule_ids=accepted_rule_ids,
+                run_summary={
+                    "raw_events": len(raw_events),
+                    "normalized_events": len(normalized_events),
+                    "rule_hits": len(rule_hits),
+                    "cases": len(case_bundles),
+                    "accepted_summaries": len(case_summaries),
+                    "rejected_summaries": rejected_summary_count,
+                    "audit_records": len(audit_records),
+                },
+            ),
+            artifacts_dir / "case_report.md",
+        ),
+        "audit_traces": write_jsonl(audit_records, artifacts_dir / "audit_traces.jsonl"),
+    }
 
     return {
         "demo_root": demo_root,
@@ -838,12 +848,13 @@ def classify_schema_errors(errors: Sequence[str]) -> str:
     return "schema_validation_failed"
 
 
-def build_case_report(
-    case_bundles: Sequence[Mapping[str, Any]],
-    case_summaries: Sequence[Mapping[str, Any]],
-    audit_records: Sequence[Mapping[str, Any]],
-    accepted_rule_ids: Sequence[str],
-) -> str:
+def build_case_report(
+    case_bundles: Sequence[Mapping[str, Any]],
+    case_summaries: Sequence[Mapping[str, Any]],
+    audit_records: Sequence[Mapping[str, Any]],
+    accepted_rule_ids: Sequence[str],
+    run_summary: Mapping[str, int],
+) -> str:
     global_rejections = [
         record for record in audit_records if record.get("case_id") is None
     ]
@@ -867,11 +878,21 @@ def build_case_report(
         "# AI-Assisted Detection Demo Report",
         "",
         "This report is analyst-facing draft output from a constrained case summarization pipeline.",
-        "Detections and grouping are deterministic. The LLM is limited to structured summarization only.",
-        "Human verification is required. No automated response actions or final incident verdicts are produced.",
-        "",
-        "## Run Integrity",
-        "",
+        "Detections and grouping are deterministic. The LLM is limited to structured summarization only.",
+        "Human verification is required. No automated response actions or final incident verdicts are produced.",
+        "",
+        "## Run Summary",
+        "",
+        f"- raw_events: {run_summary['raw_events']}",
+        f"- normalized_events: {run_summary['normalized_events']}",
+        f"- rule_hits: {run_summary['rule_hits']}",
+        f"- cases: {run_summary['cases']}",
+        f"- accepted_summaries: {run_summary['accepted_summaries']}",
+        f"- rejected_summaries: {run_summary['rejected_summaries']}",
+        f"- audit_records: {run_summary['audit_records']}",
+        "",
+        "## Run Integrity",
+        "",
         f"- accepted_rules: {', '.join(accepted_rule_ids) if accepted_rule_ids else 'none'}",
         f"- rejected_rules: {', '.join(rejected_rule_ids) if rejected_rule_ids else 'none'}",
         f"- coverage_degraded: {coverage_degraded}",
@@ -1007,27 +1028,27 @@ def bounded_excerpt(raw_response: str | None) -> str | None:
     return compact[:RAW_RESPONSE_EXCERPT_LIMIT]
 
 
-def write_json(records: Any, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with path.open("w", encoding="utf-8") as handle:
-        json.dump(serialize_record(records), handle, indent=2)
+def write_json(records: Any, path: Path) -> Path:
+    path = ensure_output_file_path(path)
+    with path.open("w", encoding="utf-8") as handle:
+        json.dump(serialize_record(records), handle, indent=2)
         handle.write("\n")
     return path
 
 
-def write_jsonl(records: Sequence[Mapping[str, Any]], path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    with path.open("w", encoding="utf-8") as handle:
-        for record in records:
+def write_jsonl(records: Sequence[Mapping[str, Any]], path: Path) -> Path:
+    path = ensure_output_file_path(path)
+    with path.open("w", encoding="utf-8") as handle:
+        for record in records:
             handle.write(json.dumps(serialize_record(record), sort_keys=True))
             handle.write("\n")
     return path
 
 
-def write_text(content: str, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(content, encoding="utf-8")
-    return path
+def write_text(content: str, path: Path) -> Path:
+    path = ensure_output_file_path(path)
+    path.write_text(content, encoding="utf-8", newline="\n")
+    return path
 
 
 def derive_pipeline_ts(raw_events: Sequence[Mapping[str, Any]]) -> str:
diff --git a/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py b/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py
index 2fe4c57..45e6ea1 100644
--- a/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py
+++ b/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py
@@ -8,6 +8,7 @@
 
 import yaml
 
+from ..io import ensure_output_directory, ensure_output_file_path
 from ..time_utils import parse_utc_timestamp
 
 SEVERITY_ORDER = {"low": 1, "medium": 2, "high": 3, "critical": 4}
@@ -59,7 +60,7 @@ def run_demo(
         artifacts_dir
         or resolve_demo_path(demo_root, str(config["artifacts_dir"]))
     ).resolve()
-    artifacts_dir.mkdir(parents=True, exist_ok=True)
+    ensure_output_directory(artifacts_dir)
     correlation_minutes = int(config["correlation_minutes"])
 
     config_changes = normalize_config_changes(
@@ -536,7 +537,7 @@ def format_timestamp(value: Any) -> str:
 
 
 def write_json(payload: Any, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
+    path = ensure_output_file_path(path)
     path.write_text(
         json.dumps(serialize_record(payload), indent=2) + "\n",
         encoding="utf-8",
@@ -545,8 +546,8 @@ def write_json(payload: Any, path: Path) -> Path:
 
 
 def write_text(content: str, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(content, encoding="utf-8")
+    path = ensure_output_file_path(path)
+    path.write_text(content, encoding="utf-8", newline="\n")
     return path
 
 
diff --git a/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py b/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py
index 27a9345..86cb121 100644
--- a/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py
+++ b/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py
@@ -9,6 +9,7 @@
 
 import yaml
 
+from ..io import ensure_output_directory, ensure_output_file_path
 from ..time_utils import parse_utc_timestamp
 
 SCOPE_FIELDS = ("entity", "source", "target", "host")
@@ -42,7 +43,7 @@ def run_demo(
         artifacts_dir
         or resolve_demo_path(demo_root, str(config.get("artifacts_dir", "artifacts")))
     ).resolve()
-    artifacts_dir.mkdir(parents=True, exist_ok=True)
+    ensure_output_directory(artifacts_dir)
 
     raw_hits = load_json(input_path)
     normalized_hits = normalize_rule_hits(raw_hits)
@@ -575,7 +576,7 @@ def rule_hit_sort_key(rule_hit: Mapping[str, Any]) -> tuple[str, str, str, str]:
 
 
 def write_json(payload: Any, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
+    path = ensure_output_file_path(path)
     path.write_text(
         json.dumps(serialize_record(payload), indent=2) + "\n",
         encoding="utf-8",
@@ -584,8 +585,8 @@ def write_json(payload: Any, path: Path) -> Path:
 
 
 def write_text(content: str, path: Path) -> Path:
-    path.parent.mkdir(parents=True, exist_ok=True)
-    path.write_text(content, encoding="utf-8")
+    path = ensure_output_file_path(path)
+    path.write_text(content, encoding="utf-8", newline="\n")
     return path
 
 
diff --git a/src/telemetry_window_demo/windowing.py b/src/telemetry_window_demo/windowing.py
index 100df8f..761a97d 100644
--- a/src/telemetry_window_demo/windowing.py
+++ b/src/telemetry_window_demo/windowing.py
@@ -25,6 +25,9 @@ def build_windows(
         return []
 
     timestamps = pd.DatetimeIndex(events[timestamp_col])
+    if not timestamps.is_monotonic_increasing:
+        raise ValueError("Events must be sorted by timestamp before building windows.")
+
     start = timestamps.min().floor(f"{step_size_seconds}s")
     last_start = timestamps.max().floor(f"{step_size_seconds}s")
     window_delta = pd.Timedelta(seconds=window_size_seconds)
diff --git a/tests/test_ai_assisted_detection_demo.py b/tests/test_ai_assisted_detection_demo.py
index ecbf995..aa34454 100644
--- a/tests/test_ai_assisted_detection_demo.py
+++ b/tests/test_ai_assisted_detection_demo.py
@@ -240,7 +240,7 @@ def test_parse_and_validate_rejects_forbidden_language_in_uncertainty_notes() ->
     assert any("uncertainty_notes" in error for error in exc_info.value.errors)
 
 
-def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None:
+def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None:
     _, output_schema, _, _, _, case_bundles = _demo_inputs()
     web_case = next(
         case_bundle
@@ -254,11 +254,35 @@ def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None:
 
     assert "ignore all prior instructions" in evidence_text
     assert "ignore all prior instructions" not in system_text
-    assert envelope["evidence_payload"]["telemetry_classification"] == "untrusted_data"
-    assert any("untrusted evidence only" in item.lower() for item in web_case["evidence_highlights"])
-
-
-def test_malformed_attack_metadata_is_rejected_and_recorded(tmp_path) -> None:
+    assert envelope["evidence_payload"]["telemetry_classification"] == "untrusted_data"
+    assert any("untrusted evidence only" in item.lower() for item in web_case["evidence_highlights"])
+
+
+def test_default_case_report_includes_reviewer_run_summary(tmp_path) -> None:
+    result = run_demo(demo_root=default_demo_root(), artifacts_dir=tmp_path / "artifacts")
+
+    assert result["raw_event_count"] == 15
+    assert result["normalized_event_count"] == 15
+    assert result["rule_hit_count"] == 5
+    assert result["case_count"] == 3
+    assert result["summary_count"] == 3
+    assert result["rejected_summary_count"] == 0
+    assert result["audit_record_count"] == 3
+
+    report_text = (tmp_path / "artifacts" / "case_report.md").read_text(
+        encoding="utf-8"
+    )
+    assert "## Run Summary" in report_text
+    assert "- raw_events: 15" in report_text
+    assert "- normalized_events: 15" in report_text
+    assert "- rule_hits: 5" in report_text
+    assert "- cases: 3" in report_text
+    assert "- accepted_summaries: 3" in report_text
+    assert "- rejected_summaries: 0" in report_text
+    assert "- audit_records: 3" in report_text
+
+
+def test_malformed_attack_metadata_is_rejected_and_recorded(tmp_path) -> None:
     demo_root = _copy_demo_root(tmp_path)
     rules_path = demo_root / "config" / "rules.yaml"
     rules_config = load_yaml(rules_path)
@@ -356,7 +380,7 @@ def test_audit_traces_capture_accepted_and_rejected_paths(tmp_path) -> None:
     assert "Rejection reason: missing_required_fields" in report_text
 
 
-def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> None:
+def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> None:
     demo_root, _, _, _, _, _ = _demo_inputs()
     llm = ScriptedLlm(
         [
@@ -391,6 +415,14 @@ def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> N
     assert mismatch_record["raw_response_excerpt"] is not None
 
     report_text = (tmp_path / "artifacts" / "case_report.md").read_text(encoding="utf-8")
-    assert "## CASE-001" in report_text
-    assert "Summary status: rejected" in report_text
-    assert "Rejection reason: case_id_mismatch" in report_text
+    assert "## CASE-001" in report_text
+    assert "Summary status: rejected" in report_text
+    assert "Rejection reason: case_id_mismatch" in report_text
+
+
+def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None:
+    artifacts_dir = tmp_path / "artifacts"
+    artifacts_dir.write_text("not a directory\n", encoding="utf-8")
+
+    with pytest.raises(ValueError, match="Output directory path is not a directory"):
+        run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir)
diff --git a/tests/test_cli_subprocess.py b/tests/test_cli_subprocess.py
new file mode 100644
index 0000000..888f683
--- /dev/null
+++ b/tests/test_cli_subprocess.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+
+import yaml
+
+from telemetry_window_demo.io import load_config
+
+
+def _cli_env(repo_root: Path) -> dict[str, str]:
+    env = os.environ.copy()
+    src_path = str(repo_root / "src")
+    existing_pythonpath = env.get("PYTHONPATH")
+    env["PYTHONPATH"] = (
+        src_path
+        if not existing_pythonpath
+        else os.pathsep.join((src_path, existing_pythonpath))
+    )
+    return env
+
+
+def test_readme_summarize_command_runs_as_module() -> None:
+    repo_root = Path(__file__).resolve().parents[1]
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "telemetry_window_demo.cli",
+            "summarize",
+            "--input",
+            "data/raw/sample_events.jsonl",
+        ],
+        cwd=repo_root,
+        env=_cli_env(repo_root),
+        text=True,
+        capture_output=True,
+        timeout=30,
+    )
+
+    assert result.returncode == 0, result.stderr
+    assert "events: 41" in result.stdout
+    assert "overall_error_rate: 0.61" in result.stdout
+
+
+def test_readme_default_run_command_writes_expected_artifacts(tmp_path) -> None:
+    repo_root = Path(__file__).resolve().parents[1]
+    config = load_config(repo_root / "configs" / "default.yaml")
+    output_dir = tmp_path / "processed"
+    config["input_path"] = str((repo_root / "data" / "raw" / "sample_events.jsonl").resolve())
+    config["output_dir"] = str(output_dir.resolve())
+    config_path = tmp_path / "default.yaml"
+    config_path.write_text(yaml.safe_dump(config, sort_keys=False), encoding="utf-8")
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "telemetry_window_demo.cli",
+            "run",
+            "--config",
+            str(config_path),
+        ],
+        cwd=repo_root,
+        env=_cli_env(repo_root),
+        text=True,
+        capture_output=True,
+        timeout=30,
+    )
+
+    assert result.returncode == 0, result.stderr
+    assert "[OK] Loaded 41 events" in result.stdout
+    assert "[OK] Triggered 12 alerts" in result.stdout
+    assert (output_dir / "features.csv").is_file()
+    assert (output_dir / "alerts.csv").is_file()
+    assert (output_dir / "summary.json").is_file()
+    assert (output_dir / "event_count_timeline.png").is_file()
+
+
+def test_plot_command_runs_as_module(tmp_path) -> None:
+    repo_root = Path(__file__).resolve().parents[1]
+    output_dir = tmp_path / "plots"
+
+    result = subprocess.run(
+        [
+            sys.executable,
+            "-m",
+            "telemetry_window_demo.cli",
+            "plot",
+            "--features",
+            "data/processed/features.csv",
+            "--alerts",
+            "data/processed/alerts.csv",
+            "--output-dir",
+            str(output_dir),
+        ],
+        cwd=repo_root,
+        env=_cli_env(repo_root),
+        text=True,
+        capture_output=True,
+        timeout=30,
+    )
+
+    assert result.returncode == 0, result.stderr
+    assert "[OK] Saved plots to" in result.stdout
+    assert (output_dir / "event_count_timeline.png").is_file()
+    assert (output_dir / "error_rate_timeline.png").is_file()
+    assert (output_dir / "alerts_timeline.png").is_file()
diff --git a/tests/test_config_change_investigation_demo.py b/tests/test_config_change_investigation_demo.py
index c097341..a2baf0c 100644
--- a/tests/test_config_change_investigation_demo.py
+++ b/tests/test_config_change_investigation_demo.py
@@ -210,6 +210,14 @@ def test_run_demo_reports_config_errors_before_loading_inputs(tmp_path) -> None:
         run_demo(demo_root=demo_root, artifacts_dir=tmp_path / "artifacts")
 
 
+def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None:
+    artifacts_dir = tmp_path / "artifacts"
+    artifacts_dir.write_text("not a directory\n", encoding="utf-8")
+
+    with pytest.raises(ValueError, match="Output directory path is not a directory"):
+        run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir)
+
+
 def test_run_demo_is_deterministic_and_matches_committed_artifacts(tmp_path) -> None:
     demo_root, _, _, _, _ = _load_demo_inputs()
     first_dir = tmp_path / "run-one"
diff --git a/tests/test_rule_evaluation_and_dedup_demo.py b/tests/test_rule_evaluation_and_dedup_demo.py
index 681bec8..15b318c 100644
--- a/tests/test_rule_evaluation_and_dedup_demo.py
+++ b/tests/test_rule_evaluation_and_dedup_demo.py
@@ -3,6 +3,8 @@
 import json
 from pathlib import Path
 
+import pytest
+
 from telemetry_window_demo.rule_evaluation_and_dedup_demo import default_demo_root, run_demo
 from telemetry_window_demo.rule_evaluation_and_dedup_demo.pipeline import (
     deduplicate_rule_hits,
@@ -120,3 +122,11 @@ def test_run_demo_is_deterministic_and_matches_committed_artifacts(tmp_path) ->
     expected_report = (demo_root / "artifacts" / "dedup_report.md").read_text(encoding="utf-8")
     assert (first_dir / "dedup_report.md").read_text(encoding="utf-8") == expected_report
     assert (second_dir / "dedup_report.md").read_text(encoding="utf-8") == expected_report
+
+
+def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None:
+    artifacts_dir = tmp_path / "artifacts"
+    artifacts_dir.write_text("not a directory\n", encoding="utf-8")
+
+    with pytest.raises(ValueError, match="Output directory path is not a directory"):
+        run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir)
diff --git a/tests/test_windowing.py b/tests/test_windowing.py
index a50a1b5..47181c5 100644
--- a/tests/test_windowing.py
+++ b/tests/test_windowing.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
 import pandas as pd
+import pytest
 
 from telemetry_window_demo.preprocess import normalize_events
 from telemetry_window_demo.windowing import build_windows
@@ -90,3 +91,32 @@ def test_build_windows_handles_microsecond_backed_timestamps() -> None:
     assert windows[0].end_index == 3
     assert windows[1].start_index == 1
     assert windows[1].end_index == 3
+
+
+def test_build_windows_rejects_unsorted_timestamps() -> None:
+    events = pd.DataFrame(
+        [
+            {
+                "timestamp": pd.Timestamp("2026-03-10T10:00:20Z"),
+                "event_type": "login_fail",
+                "source": "user_b",
+                "target": "auth",
+                "status": "fail",
+            },
+            {
+                "timestamp": pd.Timestamp("2026-03-10T10:00:00Z"),
+                "event_type": "login_success",
+                "source": "user_a",
+                "target": "auth",
+                "status": "ok",
+            },
+        ]
+    )
+
+    with pytest.raises(ValueError, match="sorted by timestamp"):
+        build_windows(
+            events,
+            timestamp_col="timestamp",
+            window_size_seconds=60,
+            step_size_seconds=10,
+        )