From 35d7d016590b85cf4f236b3a321a254e1c950eb3 Mon Sep 17 00:00:00 2001 From: stacknil Date: Thu, 21 May 2026 15:38:17 +0800 Subject: [PATCH 1/2] Add reviewer brief --- docs/reviewer-brief.md | 66 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 docs/reviewer-brief.md diff --git a/docs/reviewer-brief.md b/docs/reviewer-brief.md new file mode 100644 index 0000000..649906a --- /dev/null +++ b/docs/reviewer-brief.md @@ -0,0 +1,66 @@ +# Reviewer brief + +## Problem + +Telemetry and detection projects often look impressive in screenshots but are hard to review end to end. Reviewers need a narrow, reproducible path from raw events to outputs without a production stack or opaque AI behavior. + +## What it does + +`telemetry-lab` is a local, file-based portfolio repo with four demos: + +- `telemetry-window-demo` for sliding-window features and rule-based alerts +- `ai-assisted-detection-demo` for deterministic case grouping plus bounded JSON-only LLM drafting +- `rule-evaluation-and-dedup-demo` for cooldown and suppression reasoning +- `config-change-investigation-demo` for risky-change evidence correlation + +## Quick run + +```bash +python -m pip install -e ".[dev]" +python -m telemetry_window_demo.cli run --config configs/default.yaml +python -m telemetry_window_demo.cli run-rule-dedup-demo +python -m telemetry_window_demo.cli run-config-change-demo +python -m telemetry_window_demo.cli run-ai-demo +``` + +## Sample output + +The default `run --config configs/default.yaml` path regenerates: + +- `data/processed/features.csv` +- `data/processed/alerts.csv` +- `data/processed/summary.json` +- three PNG timelines under `data/processed/` + +The current committed default sample reports: + +- `41` normalized events +- `24` windows +- `12` alerts after a `60` second cooldown + +The other demos emit reviewer-facing artifacts such as `dedup_report.md`, `investigation_report.md`, and `case_report.md`. + +## What this proves + +- telemetry normalization and windowed feature design +- alert logic that stays reviewable instead of disappearing into scoring +- bounded, explicitly non-autonomous AI use +- reviewer-friendly artifact generation across multiple demo shapes + +## Safety / boundaries + +- local sample-data workflows only +- no real-time ingestion or autonomous response +- no final incident verdicts from the AI-assisted demo +- public review focus, not production deployment claims + +## Limitations + +- no alert routing, dashboarding, or case management +- sample-data driven, not connected to live systems +- no streaming state management +- intentionally small-scope demos rather than a unified monitoring platform + +## Next milestone + +Add another compact investigation path that strengthens the bridge from telemetry features to analyst-facing monitoring and detection workflows. From 086f5bf47552525fa0dc3030b1bca80b60e76d43 Mon Sep 17 00:00:00 2001 From: stacknil Date: Thu, 21 May 2026 22:47:28 +0800 Subject: [PATCH 2/2] docs(review): strengthen demo coherence --- .gitignore | 1 + CONTRIBUTING.md | 2 +- README.md | 44 +++-- demos/ai-assisted-detection-demo/README.md | 59 ++++-- .../artifacts/case_report.md | 170 +++++++++--------- .../README.md | 11 ++ .../rule-evaluation-and-dedup-demo/README.md | 10 ++ docs/design-notes.md | 2 +- docs/reviewer-brief.md | 8 + docs/reviewer-path.md | 39 ++++ .../ai_assisted_detection_demo/pipeline.py | 109 ++++++----- .../pipeline.py | 9 +- .../pipeline.py | 9 +- src/telemetry_window_demo/windowing.py | 3 + tests/test_ai_assisted_detection_demo.py | 52 ++++-- tests/test_cli_subprocess.py | 111 ++++++++++++ .../test_config_change_investigation_demo.py | 8 + tests/test_rule_evaluation_and_dedup_demo.py | 10 ++ tests/test_windowing.py | 30 ++++ 19 files changed, 509 insertions(+), 178 deletions(-) create mode 100644 docs/reviewer-path.md create mode 100644 tests/test_cli_subprocess.py diff --git a/.gitignore b/.gitignore index b3b3f0a..e45129c 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ __pycache__/ *.py[cod] *.egg-info/ .pytest_cache/ +.pytest-artifacts*/ .coverage .venv/ venv/ diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 554540b..7b90aa6 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -17,7 +17,7 @@ Thanks for the interest. This is a solo-maintainer portfolio repository, so the - run: ```bash - python -m pip install -e . + python -m pip install -e ".[dev]" pytest ``` diff --git a/README.md b/README.md index e1cdfea..8261140 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![CI](https://github.com/stacknil/telemetry-lab/actions/workflows/ci.yml/badge.svg)](https://github.com/stacknil/telemetry-lab/actions/workflows/ci.yml) -Small portfolio prototypes for telemetry analytics, monitoring, and detection-oriented signal processing. +A local, file-based detection workflow lab for reviewer-verifiable telemetry and detection demos. Latest milestone: [v0.6.0 — fourth demo and config-change investigation](https://github.com/stacknil/telemetry-lab/releases/latest). @@ -22,7 +22,7 @@ Latest milestone: [v0.6.0 — fourth demo and config-change investigation](https ## What This Repo Is -`telemetry-lab` is a small portfolio repository for telemetry analytics and constrained detection-oriented workflows. It is organized as four local, file-based demos that are reproducible from committed sample data and intentionally scoped for public review rather than production use. +`telemetry-lab` is a small portfolio repository for constrained detection workflows. It is not a SIEM, dashboard, or monitoring platform; it is organized as four local, file-based demos that are reproducible from committed sample data and intentionally scoped for public review rather than production use. ### telemetry-window-demo @@ -47,6 +47,9 @@ python -m pip install -e . python -m telemetry_window_demo.cli run --config configs/default.yaml ``` +Use the same Python interpreter for install, tests, and demo commands. On machines with multiple Python installs, replace `python` with the intended interpreter path. +To run the test suite in a fresh environment, install the dev extra with `python -m pip install -e ".[dev]"`. + Other demo entrypoints: - `python -m telemetry_window_demo.cli run-ai-demo` @@ -56,9 +59,10 @@ Other demo entrypoints: Useful inspection commands: - `python -m telemetry_window_demo.cli summarize --input data/raw/sample_events.jsonl` -- `python -m telemetry_window_demo.cli summarize --input events.csv --timestamp-col event_time` -That command reads `data/raw/sample_events.jsonl` and regenerates: +For CSV inputs, pass a `.csv` file to `--input`; use `--timestamp-col` when the timestamp column is not named `timestamp`. + +The `run --config configs/default.yaml` command reads `data/raw/sample_events.jsonl` and regenerates: - `data/processed/features.csv` - `data/processed/alerts.csv` @@ -73,17 +77,26 @@ With the bundled default sample, the current repo state produces: - `24` windows - `12` alerts after a `60` second cooldown -Why it is worth a quick look: - -- it shows a full telemetry path from raw events to operator-facing outputs -- the sample inputs and outputs are reproducible in-repo -- a second bundled scenario gives a slightly richer walkthrough without changing the basic CLI flow - -![Default alert timeline](data/processed/alerts_timeline.png) - -## Demo Variants - -Default sample: +Why it is worth a quick look: + +- it shows a full telemetry path from raw events to operator-facing outputs +- the sample inputs and outputs are reproducible in-repo +- a second bundled scenario gives a slightly richer walkthrough without changing the basic CLI flow + +![Default alert timeline](data/processed/alerts_timeline.png) + +## Reviewer Path + +For a quick coherence pass across the demos: + +1. Run `python -m telemetry_window_demo.cli run --config configs/default.yaml` and confirm `data/processed/summary.json` reports `41` events, `24` windows, and `12` alerts. +2. Run `python -m telemetry_window_demo.cli run-rule-dedup-demo` and confirm `demos/rule-evaluation-and-dedup-demo/artifacts/dedup_report.md` shows `10` raw hits reduced to `6` retained alerts with `4` suppressions. +3. Run `python -m telemetry_window_demo.cli run-config-change-demo` and confirm `demos/config-change-investigation-demo/artifacts/investigation_report.md` shows `4` normalized changes, `3` risky changes, and `3` investigations. +4. Run `python -m telemetry_window_demo.cli run-ai-demo` and confirm `demos/ai-assisted-detection-demo/artifacts/case_report.md` shows `3` deterministic cases with human verification and no final incident verdict. + +## Demo Variants + +Default sample: - config: [`configs/default.yaml`](configs/default.yaml) - input: `data/raw/sample_events.jsonl` @@ -129,6 +142,7 @@ Cooldown behavior: - [`demos/rule-evaluation-and-dedup-demo/README.md`](demos/rule-evaluation-and-dedup-demo/README.md) explains the third demo and links its committed before/after dedup artifacts - [`demos/config-change-investigation-demo/README.md`](demos/config-change-investigation-demo/README.md) explains the config-change investigation demo and its committed artifacts +- [`docs/reviewer-path.md`](docs/reviewer-path.md) maps common review questions to the right demo and artifacts - [`docs/sample-output.md`](docs/sample-output.md) summarizes the committed sample artifacts - [`docs/roadmap.md`](docs/roadmap.md) sketches the next demo directions - [`data/processed/summary.json`](data/processed/summary.json) captures the default run in machine-readable form diff --git a/demos/ai-assisted-detection-demo/README.md b/demos/ai-assisted-detection-demo/README.md index 24b9c6e..e8b7562 100644 --- a/demos/ai-assisted-detection-demo/README.md +++ b/demos/ai-assisted-detection-demo/README.md @@ -65,23 +65,32 @@ Generated artifacts are written to `demos/ai-assisted-detection-demo/artifacts/` - deterministic rules: `config/rules.yaml` - structured output schema: `config/llm_case_output_schema.json` -## Expected artifacts - -- `artifacts/rule_hits.json` -- `artifacts/case_bundles.json` -- `artifacts/case_summaries.json` -- `artifacts/case_report.md` -- `artifacts/audit_traces.jsonl` - -The bundled sample data is designed to produce at least three generated cases. - -## Artifact semantics +## Expected artifacts + +- `artifacts/rule_hits.json` +- `artifacts/case_bundles.json` +- `artifacts/case_summaries.json` +- `artifacts/case_report.md` +- `artifacts/audit_traces.jsonl` + +## Expected run summary + +The bundled sample run should report: + +- `15` raw events normalized into `15` internal events +- `5` deterministic rule hits +- `3` grouped cases +- `3` accepted JSON summaries +- `0` rejected summaries in the default accepted path +- `3` audit records + +## Artifact semantics - `rule_hits.json`: deterministic rule hits with rule metadata, ATT&CK mapping, entities, and evidence highlights - `case_bundles.json`: grouped cases with severity, rule hits, ATT&CK mappings, raw evidence, and untrusted-data marking - `case_summaries.json`: only accepted JSON summaries that passed schema and semantic validation -- `case_report.md`: analyst-facing report that shows accepted summaries and explicitly notes rejected case summaries -- `case_report.md`: includes a top-level run integrity section that surfaces rule/config degradation +- `case_report.md`: analyst-facing report with run counts, accepted summaries, and explicit notes for rejected case summaries +- `case_report.md`: includes a top-level run integrity section that surfaces rule/config degradation - `audit_traces.jsonl`: stable per-record audit log for accepted and rejected paths, using `schema_version = ai-assisted-detection-audit/v1` and including `ts`, `case_id`, `validation_status`, `rejection_reason`, `rule_ids`, `prompt_input_digest`, `evidence_digest`, and bounded response excerpts ## Rejection behavior @@ -103,15 +112,27 @@ Use the default sample run artifacts in `artifacts/case_summaries.json`, `artifa Verify that `CASE-001` appears in all three places, that the `case_id` matches exactly, that `human_verification` is `required`, and that the audit record shows `validation_status = accepted` with `schema_version = ai-assisted-detection-audit/v1`. -### Rejected summary path - -Run `pytest tests/test_ai_assisted_detection_demo.py -k "audit_traces_capture_accepted_and_rejected_paths or case_id_mismatch"` and inspect the `case_report.md`, `case_summaries.json`, and `audit_traces.jsonl` artifacts written by the test. +### Rejected summary path + +Run: + +```bash +pytest tests/test_ai_assisted_detection_demo.py -k "audit_traces_capture_accepted_and_rejected_paths or case_id_mismatch" --basetemp .pytest-artifacts-ai-demo-rejections +``` + +Then inspect the `case_report.md`, `case_summaries.json`, and `audit_traces.jsonl` files under `.pytest-artifacts-ai-demo-rejections/test_*/artifacts/`. Verify that the rejected case is absent from `case_summaries.json`, appears in `case_report.md` as `Summary status: rejected`, and has an audit record with `validation_status = rejected` plus a concrete `rejection_reason` such as `missing_required_fields`, `semantic_validation_failed`, or `case_id_mismatch`. -### Degraded coverage path - -Run `pytest tests/test_ai_assisted_detection_demo.py -k malformed_attack_metadata_is_rejected_and_recorded` and inspect the generated `case_report.md` and `audit_traces.jsonl`. +### Degraded coverage path + +Run: + +```bash +pytest tests/test_ai_assisted_detection_demo.py -k malformed_attack_metadata_is_rejected_and_recorded --basetemp .pytest-artifacts-ai-demo-degraded +``` + +Then inspect the generated `case_report.md` and `audit_traces.jsonl` files under `.pytest-artifacts-ai-demo-degraded/test_*/artifacts/`. Verify that `case_report.md` exposes `## Run Integrity`, `coverage_degraded: yes`, and the rejected rule id, and that `audit_traces.jsonl` contains a global rejection record with `case_id = null` and `rejection_reason = rule_metadata_validation_failed`. diff --git a/demos/ai-assisted-detection-demo/artifacts/case_report.md b/demos/ai-assisted-detection-demo/artifacts/case_report.md index 3338ff7..845917d 100644 --- a/demos/ai-assisted-detection-demo/artifacts/case_report.md +++ b/demos/ai-assisted-detection-demo/artifacts/case_report.md @@ -1,80 +1,90 @@ -# AI-Assisted Detection Demo Report - -This report is analyst-facing draft output from a constrained case summarization pipeline. -Detections and grouping are deterministic. The LLM is limited to structured summarization only. -Human verification is required. No automated response actions or final incident verdicts are produced. - -## Run Integrity - -- accepted_rules: AUTH-001, AUTH-002, PROC-001, WEB-001 -- rejected_rules: none -- coverage_degraded: no -- rejection_reasons: none - -## CASE-001 - -- Severity: high -- First seen: 2026-03-27T09:01:55Z -- Last seen: 2026-03-27T09:02:20Z -- Rule hits: repeated_failed_logins, successful_login_after_failures -- ATT&CK: T1110, T1078 - -Summary: CASE-001 contains 2 deterministic rule hits covering repeated_failed_logins, successful_login_after_failures for principal ops_admin; src_ip 198.51.100.24; host vpn-gw-01 during 2026-03-27T09:01:55Z to 2026-03-27T09:02:20Z. The case warrants analyst review but does not imply a final incident decision. - -Likely causes: -- Repeated password guessing or credential stuffing against the targeted account. -- A valid credential may have been used after several failed login attempts. - -Uncertainty notes: -- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. -- The case summary is advisory only and requires human review before any incident classification. - -Suggested next steps: -- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. -- Check authentication context for MFA state, prior successful logins, and expected source locations. -- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. - -## CASE-002 - -- Severity: medium -- First seen: 2026-03-27T09:11:10Z -- Last seen: 2026-03-27T09:11:10Z -- Rule hits: sensitive_path_scan -- ATT&CK: T1595 - -Summary: CASE-002 contains 1 deterministic rule hits covering sensitive_path_scan for src_ip 203.0.113.77; host portal-01 during 2026-03-27T09:11:10Z to 2026-03-27T09:11:10Z. The case warrants analyst review but does not imply a final incident decision. - -Likely causes: -- The source IP appears to be probing sensitive web paths on the exposed application. - -Uncertainty notes: -- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. -- The case summary is advisory only and requires human review before any incident classification. -- Prompt-like text appeared in telemetry and was treated strictly as untrusted evidence. - -Suggested next steps: -- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. -- Compare the web requests with reverse-proxy and WAF logs to determine whether the probing continued. -- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. - -## CASE-003 - -- Severity: high -- First seen: 2026-03-27T09:20:00Z -- Last seen: 2026-03-27T09:20:20Z -- Rule hits: encoded_powershell_execution, encoded_powershell_execution -- ATT&CK: T1059.001 - -Summary: CASE-003 contains 2 deterministic rule hits covering encoded_powershell_execution for principal lab_user; host wkstn-07 during 2026-03-27T09:20:00Z to 2026-03-27T09:20:20Z. The case warrants analyst review but does not imply a final incident decision. - -Likely causes: -- Obfuscated PowerShell execution may reflect manual tradecraft or an unsafe script. - -Uncertainty notes: -- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. -- The case summary is advisory only and requires human review before any incident classification. - -Suggested next steps: -- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. -- Inspect the originating host timeline and validate whether the encoded PowerShell command matches known tooling. -- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. +# AI-Assisted Detection Demo Report + +This report is analyst-facing draft output from a constrained case summarization pipeline. +Detections and grouping are deterministic. The LLM is limited to structured summarization only. +Human verification is required. No automated response actions or final incident verdicts are produced. + +## Run Summary + +- raw_events: 15 +- normalized_events: 15 +- rule_hits: 5 +- cases: 3 +- accepted_summaries: 3 +- rejected_summaries: 0 +- audit_records: 3 + +## Run Integrity + +- accepted_rules: AUTH-001, AUTH-002, PROC-001, WEB-001 +- rejected_rules: none +- coverage_degraded: no +- rejection_reasons: none + +## CASE-001 + +- Severity: high +- First seen: 2026-03-27T09:01:55Z +- Last seen: 2026-03-27T09:02:20Z +- Rule hits: repeated_failed_logins, successful_login_after_failures +- ATT&CK: T1110, T1078 + +Summary: CASE-001 contains 2 deterministic rule hits covering repeated_failed_logins, successful_login_after_failures for principal ops_admin; src_ip 198.51.100.24; host vpn-gw-01 during 2026-03-27T09:01:55Z to 2026-03-27T09:02:20Z. The case warrants analyst review but does not imply a final incident decision. + +Likely causes: +- Repeated password guessing or credential stuffing against the targeted account. +- A valid credential may have been used after several failed login attempts. + +Uncertainty notes: +- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. +- The case summary is advisory only and requires human review before any incident classification. + +Suggested next steps: +- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. +- Check authentication context for MFA state, prior successful logins, and expected source locations. +- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. + +## CASE-002 + +- Severity: medium +- First seen: 2026-03-27T09:11:10Z +- Last seen: 2026-03-27T09:11:10Z +- Rule hits: sensitive_path_scan +- ATT&CK: T1595 + +Summary: CASE-002 contains 1 deterministic rule hits covering sensitive_path_scan for src_ip 203.0.113.77; host portal-01 during 2026-03-27T09:11:10Z to 2026-03-27T09:11:10Z. The case warrants analyst review but does not imply a final incident decision. + +Likely causes: +- The source IP appears to be probing sensitive web paths on the exposed application. + +Uncertainty notes: +- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. +- The case summary is advisory only and requires human review before any incident classification. +- Prompt-like text appeared in telemetry and was treated strictly as untrusted evidence. + +Suggested next steps: +- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. +- Compare the web requests with reverse-proxy and WAF logs to determine whether the probing continued. +- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. + +## CASE-003 + +- Severity: high +- First seen: 2026-03-27T09:20:00Z +- Last seen: 2026-03-27T09:20:20Z +- Rule hits: encoded_powershell_execution, encoded_powershell_execution +- ATT&CK: T1059.001 + +Summary: CASE-003 contains 2 deterministic rule hits covering encoded_powershell_execution for principal lab_user; host wkstn-07 during 2026-03-27T09:20:00Z to 2026-03-27T09:20:20Z. The case warrants analyst review but does not imply a final incident decision. + +Likely causes: +- Obfuscated PowerShell execution may reflect manual tradecraft or an unsafe script. + +Uncertainty notes: +- Telemetry is limited to the bundled sample evidence and does not confirm operator intent. +- The case summary is advisory only and requires human review before any incident classification. + +Suggested next steps: +- Review the raw evidence and confirm whether the activity aligns with an approved administrative task. +- Inspect the originating host timeline and validate whether the encoded PowerShell command matches known tooling. +- Document the analyst conclusion separately after human verification; do not treat this summary as a final verdict. diff --git a/demos/config-change-investigation-demo/README.md b/demos/config-change-investigation-demo/README.md index ed2ba4f..0a86235 100644 --- a/demos/config-change-investigation-demo/README.md +++ b/demos/config-change-investigation-demo/README.md @@ -58,6 +58,17 @@ Evidence is attached only when: - `artifacts/investigation_summary.json` - `artifacts/investigation_report.md` +## Expected Run Summary + +The bundled sample run should report: + +- `4` normalized configuration changes +- `3` risky changes +- `3` investigations +- `15` minute correlation window +- `1` benign change that remains normalized but does not become an investigation +- `1` risky break-glass investigation with no nearby supporting evidence + ## Artifact Semantics - `change_events_normalized.json`: normalized config changes before any rule match is applied diff --git a/demos/rule-evaluation-and-dedup-demo/README.md b/demos/rule-evaluation-and-dedup-demo/README.md index ddbb90e..71fc80d 100644 --- a/demos/rule-evaluation-and-dedup-demo/README.md +++ b/demos/rule-evaluation-and-dedup-demo/README.md @@ -59,6 +59,16 @@ That means repeated hits for the same rule can still be kept separately when the - `artifacts/dedup_explanations.json` - `artifacts/dedup_report.md` +## Expected Run Summary + +The bundled sample run should report: + +- `10` raw rule hits +- `6` retained alerts +- `4` suppressed repeated hits +- `4` rule/scope groups +- `180` second cooldown + ## Artifact Semantics - `rule_hits_before_dedup.json`: normalized raw hits with resolved cooldown scope and cooldown key diff --git a/docs/design-notes.md b/docs/design-notes.md index d8d07f0..20bfd83 100644 --- a/docs/design-notes.md +++ b/docs/design-notes.md @@ -23,7 +23,7 @@ The implementation follows a narrow pipeline: - `pandas` is used for clarity and concise feature computation. - Rules stay threshold-based to emphasize detection semantics over model complexity. - Relative config paths are resolved from the repository root when the config lives under `configs/`. -- Outputs are CSV and PNG because they are easy to inspect, diff, and embed in README material. +- Outputs are CSV, JSON, and PNG: tables and summaries stay easy to diff, while PNG timelines are easy to inspect and embed in README material. - The notebook remains intentionally tiny so the packaged CLI pipeline stays the primary entrypoint. ## Non-goals diff --git a/docs/reviewer-brief.md b/docs/reviewer-brief.md index 649906a..42d3b9d 100644 --- a/docs/reviewer-brief.md +++ b/docs/reviewer-brief.md @@ -13,6 +13,14 @@ Telemetry and detection projects often look impressive in screenshots but are ha - `rule-evaluation-and-dedup-demo` for cooldown and suppression reasoning - `config-change-investigation-demo` for risky-change evidence correlation +## Reviewer Evidence + +- Reproducible command: `python -m telemetry_window_demo.cli run --config configs/default.yaml` +- Deterministic outputs: feature tables, alert tables, `summary.json`, PNG timelines, dedup reports, investigation reports, and bounded AI case reports. +- Tests / CI: pytest coverage for windowing, CLI behavior, demo pipelines, artifact validation, and deterministic guardrails; GitHub Actions CI is enabled. +- Release evidence: reviewer packs and release notes through the current `v0.6.0` milestone. +- Non-goals: production monitoring, real-time ingestion, alert routing, autonomous response, dashboards, or final incident verdicts. + ## Quick run ```bash diff --git a/docs/reviewer-path.md b/docs/reviewer-path.md new file mode 100644 index 0000000..a1bf3f3 --- /dev/null +++ b/docs/reviewer-path.md @@ -0,0 +1,39 @@ +# Reviewer Path + +`telemetry-lab` is a controlled detection workflow portfolio. It is not a SIEM, not a dashboard, and not an unfinished monitoring platform. + +The repo is intentionally local and file-based so reviewers can verify each workflow from committed sample inputs to generated artifacts without live infrastructure, alert routing, or autonomous response behavior. + +## Choose a demo by review question + +| Question | Demo | What to inspect | +| --- | --- | --- | +| How are raw events converted to alert features? | `telemetry-window-demo` | `data/processed/features.csv`, `data/processed/alerts.csv`, `data/processed/summary.json` | +| How is AI constrained? | `ai-assisted-detection-demo` | `demos/ai-assisted-detection-demo/artifacts/case_summaries.json`, `demos/ai-assisted-detection-demo/artifacts/audit_traces.jsonl`, guardrails in `demos/ai-assisted-detection-demo/README.md` | +| How are duplicate alerts reduced? | `rule-evaluation-and-dedup-demo` | `demos/rule-evaluation-and-dedup-demo/artifacts/rule_hits_before_dedup.json`, `demos/rule-evaluation-and-dedup-demo/artifacts/rule_hits_after_dedup.json`, `demos/rule-evaluation-and-dedup-demo/artifacts/dedup_explanations.json` | +| How are risky config changes investigated? | `config-change-investigation-demo` | `demos/config-change-investigation-demo/artifacts/investigation_hits.json`, `demos/config-change-investigation-demo/artifacts/investigation_report.md` | + +## Fast verification commands + +From the repository root: + +Use the same Python interpreter for install, tests, and demo commands. On machines with multiple Python installs, replace `python` with the intended interpreter path. + +```bash +python -m pip install -e ".[dev]" +python -m telemetry_window_demo.cli run --config configs/default.yaml +python -m telemetry_window_demo.cli run-ai-demo +python -m telemetry_window_demo.cli run-rule-dedup-demo +python -m telemetry_window_demo.cli run-config-change-demo +pytest +``` + +## Expected boundaries + +- No production monitoring claims +- No real-time ingestion or streaming state +- No alert routing, dashboard, or case-management service +- No autonomous response actions +- No final incident verdicts from the AI-assisted demo + +The reviewer value is the workflow evidence: deterministic inputs, visible intermediate artifacts, constrained summaries, and reports that make detection behavior inspectable. diff --git a/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py b/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py index 265a7eb..d5c4d90 100644 --- a/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py +++ b/src/telemetry_window_demo/ai_assisted_detection_demo/pipeline.py @@ -9,9 +9,10 @@ from pathlib import Path from typing import Any -import yaml - -from ..time_utils import parse_utc_timestamp +import yaml + +from ..io import ensure_output_directory, ensure_output_file_path +from ..time_utils import parse_utc_timestamp from .llm import DemoStructuredCaseLlm SEVERITY_ORDER = {"low": 1, "medium": 2, "high": 3, "critical": 4} @@ -100,9 +101,9 @@ def run_demo( artifacts_dir: Path | None = None, llm: Any | None = None, ) -> dict[str, Any]: - demo_root = Path(demo_root or default_demo_root()).resolve() - artifacts_dir = Path(artifacts_dir or demo_root / "artifacts").resolve() - artifacts_dir.mkdir(parents=True, exist_ok=True) + demo_root = Path(demo_root or default_demo_root()).resolve() + artifacts_dir = Path(artifacts_dir or demo_root / "artifacts").resolve() + ensure_output_directory(artifacts_dir) raw_events = load_jsonl(demo_root / "data" / "raw" / "sample_security_events.jsonl") rules_config = load_yaml(demo_root / "config" / "rules.yaml") @@ -241,21 +242,30 @@ def run_demo( ) ) - paths = { - "rule_hits": write_json(rule_hits, artifacts_dir / "rule_hits.json"), - "case_bundles": write_json(case_bundles, artifacts_dir / "case_bundles.json"), - "case_summaries": write_json(case_summaries, artifacts_dir / "case_summaries.json"), - "case_report": write_text( - build_case_report( - case_bundles, - case_summaries, - audit_records, - accepted_rule_ids=accepted_rule_ids, - ), - artifacts_dir / "case_report.md", - ), - "audit_traces": write_jsonl(audit_records, artifacts_dir / "audit_traces.jsonl"), - } + paths = { + "rule_hits": write_json(rule_hits, artifacts_dir / "rule_hits.json"), + "case_bundles": write_json(case_bundles, artifacts_dir / "case_bundles.json"), + "case_summaries": write_json(case_summaries, artifacts_dir / "case_summaries.json"), + "case_report": write_text( + build_case_report( + case_bundles, + case_summaries, + audit_records, + accepted_rule_ids=accepted_rule_ids, + run_summary={ + "raw_events": len(raw_events), + "normalized_events": len(normalized_events), + "rule_hits": len(rule_hits), + "cases": len(case_bundles), + "accepted_summaries": len(case_summaries), + "rejected_summaries": rejected_summary_count, + "audit_records": len(audit_records), + }, + ), + artifacts_dir / "case_report.md", + ), + "audit_traces": write_jsonl(audit_records, artifacts_dir / "audit_traces.jsonl"), + } return { "demo_root": demo_root, @@ -838,12 +848,13 @@ def classify_schema_errors(errors: Sequence[str]) -> str: return "schema_validation_failed" -def build_case_report( - case_bundles: Sequence[Mapping[str, Any]], - case_summaries: Sequence[Mapping[str, Any]], - audit_records: Sequence[Mapping[str, Any]], - accepted_rule_ids: Sequence[str], -) -> str: +def build_case_report( + case_bundles: Sequence[Mapping[str, Any]], + case_summaries: Sequence[Mapping[str, Any]], + audit_records: Sequence[Mapping[str, Any]], + accepted_rule_ids: Sequence[str], + run_summary: Mapping[str, int], +) -> str: global_rejections = [ record for record in audit_records if record.get("case_id") is None ] @@ -867,11 +878,21 @@ def build_case_report( "# AI-Assisted Detection Demo Report", "", "This report is analyst-facing draft output from a constrained case summarization pipeline.", - "Detections and grouping are deterministic. The LLM is limited to structured summarization only.", - "Human verification is required. No automated response actions or final incident verdicts are produced.", - "", - "## Run Integrity", - "", + "Detections and grouping are deterministic. The LLM is limited to structured summarization only.", + "Human verification is required. No automated response actions or final incident verdicts are produced.", + "", + "## Run Summary", + "", + f"- raw_events: {run_summary['raw_events']}", + f"- normalized_events: {run_summary['normalized_events']}", + f"- rule_hits: {run_summary['rule_hits']}", + f"- cases: {run_summary['cases']}", + f"- accepted_summaries: {run_summary['accepted_summaries']}", + f"- rejected_summaries: {run_summary['rejected_summaries']}", + f"- audit_records: {run_summary['audit_records']}", + "", + "## Run Integrity", + "", f"- accepted_rules: {', '.join(accepted_rule_ids) if accepted_rule_ids else 'none'}", f"- rejected_rules: {', '.join(rejected_rule_ids) if rejected_rule_ids else 'none'}", f"- coverage_degraded: {coverage_degraded}", @@ -1007,27 +1028,27 @@ def bounded_excerpt(raw_response: str | None) -> str | None: return compact[:RAW_RESPONSE_EXCERPT_LIMIT] -def write_json(records: Any, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - with path.open("w", encoding="utf-8") as handle: - json.dump(serialize_record(records), handle, indent=2) +def write_json(records: Any, path: Path) -> Path: + path = ensure_output_file_path(path) + with path.open("w", encoding="utf-8") as handle: + json.dump(serialize_record(records), handle, indent=2) handle.write("\n") return path -def write_jsonl(records: Sequence[Mapping[str, Any]], path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - with path.open("w", encoding="utf-8") as handle: - for record in records: +def write_jsonl(records: Sequence[Mapping[str, Any]], path: Path) -> Path: + path = ensure_output_file_path(path) + with path.open("w", encoding="utf-8") as handle: + for record in records: handle.write(json.dumps(serialize_record(record), sort_keys=True)) handle.write("\n") return path -def write_text(content: str, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8") - return path +def write_text(content: str, path: Path) -> Path: + path = ensure_output_file_path(path) + path.write_text(content, encoding="utf-8", newline="\n") + return path def derive_pipeline_ts(raw_events: Sequence[Mapping[str, Any]]) -> str: diff --git a/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py b/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py index 2fe4c57..45e6ea1 100644 --- a/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py +++ b/src/telemetry_window_demo/config_change_investigation_demo/pipeline.py @@ -8,6 +8,7 @@ import yaml +from ..io import ensure_output_directory, ensure_output_file_path from ..time_utils import parse_utc_timestamp SEVERITY_ORDER = {"low": 1, "medium": 2, "high": 3, "critical": 4} @@ -59,7 +60,7 @@ def run_demo( artifacts_dir or resolve_demo_path(demo_root, str(config["artifacts_dir"])) ).resolve() - artifacts_dir.mkdir(parents=True, exist_ok=True) + ensure_output_directory(artifacts_dir) correlation_minutes = int(config["correlation_minutes"]) config_changes = normalize_config_changes( @@ -536,7 +537,7 @@ def format_timestamp(value: Any) -> str: def write_json(payload: Any, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) + path = ensure_output_file_path(path) path.write_text( json.dumps(serialize_record(payload), indent=2) + "\n", encoding="utf-8", @@ -545,8 +546,8 @@ def write_json(payload: Any, path: Path) -> Path: def write_text(content: str, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8") + path = ensure_output_file_path(path) + path.write_text(content, encoding="utf-8", newline="\n") return path diff --git a/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py b/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py index 27a9345..86cb121 100644 --- a/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py +++ b/src/telemetry_window_demo/rule_evaluation_and_dedup_demo/pipeline.py @@ -9,6 +9,7 @@ import yaml +from ..io import ensure_output_directory, ensure_output_file_path from ..time_utils import parse_utc_timestamp SCOPE_FIELDS = ("entity", "source", "target", "host") @@ -42,7 +43,7 @@ def run_demo( artifacts_dir or resolve_demo_path(demo_root, str(config.get("artifacts_dir", "artifacts"))) ).resolve() - artifacts_dir.mkdir(parents=True, exist_ok=True) + ensure_output_directory(artifacts_dir) raw_hits = load_json(input_path) normalized_hits = normalize_rule_hits(raw_hits) @@ -575,7 +576,7 @@ def rule_hit_sort_key(rule_hit: Mapping[str, Any]) -> tuple[str, str, str, str]: def write_json(payload: Any, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) + path = ensure_output_file_path(path) path.write_text( json.dumps(serialize_record(payload), indent=2) + "\n", encoding="utf-8", @@ -584,8 +585,8 @@ def write_json(payload: Any, path: Path) -> Path: def write_text(content: str, path: Path) -> Path: - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(content, encoding="utf-8") + path = ensure_output_file_path(path) + path.write_text(content, encoding="utf-8", newline="\n") return path diff --git a/src/telemetry_window_demo/windowing.py b/src/telemetry_window_demo/windowing.py index 100df8f..761a97d 100644 --- a/src/telemetry_window_demo/windowing.py +++ b/src/telemetry_window_demo/windowing.py @@ -25,6 +25,9 @@ def build_windows( return [] timestamps = pd.DatetimeIndex(events[timestamp_col]) + if not timestamps.is_monotonic_increasing: + raise ValueError("Events must be sorted by timestamp before building windows.") + start = timestamps.min().floor(f"{step_size_seconds}s") last_start = timestamps.max().floor(f"{step_size_seconds}s") window_delta = pd.Timedelta(seconds=window_size_seconds) diff --git a/tests/test_ai_assisted_detection_demo.py b/tests/test_ai_assisted_detection_demo.py index ecbf995..aa34454 100644 --- a/tests/test_ai_assisted_detection_demo.py +++ b/tests/test_ai_assisted_detection_demo.py @@ -240,7 +240,7 @@ def test_parse_and_validate_rejects_forbidden_language_in_uncertainty_notes() -> assert any("uncertainty_notes" in error for error in exc_info.value.errors) -def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None: +def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None: _, output_schema, _, _, _, case_bundles = _demo_inputs() web_case = next( case_bundle @@ -254,11 +254,35 @@ def test_prompt_injection_like_event_stays_in_untrusted_evidence() -> None: assert "ignore all prior instructions" in evidence_text assert "ignore all prior instructions" not in system_text - assert envelope["evidence_payload"]["telemetry_classification"] == "untrusted_data" - assert any("untrusted evidence only" in item.lower() for item in web_case["evidence_highlights"]) - - -def test_malformed_attack_metadata_is_rejected_and_recorded(tmp_path) -> None: + assert envelope["evidence_payload"]["telemetry_classification"] == "untrusted_data" + assert any("untrusted evidence only" in item.lower() for item in web_case["evidence_highlights"]) + + +def test_default_case_report_includes_reviewer_run_summary(tmp_path) -> None: + result = run_demo(demo_root=default_demo_root(), artifacts_dir=tmp_path / "artifacts") + + assert result["raw_event_count"] == 15 + assert result["normalized_event_count"] == 15 + assert result["rule_hit_count"] == 5 + assert result["case_count"] == 3 + assert result["summary_count"] == 3 + assert result["rejected_summary_count"] == 0 + assert result["audit_record_count"] == 3 + + report_text = (tmp_path / "artifacts" / "case_report.md").read_text( + encoding="utf-8" + ) + assert "## Run Summary" in report_text + assert "- raw_events: 15" in report_text + assert "- normalized_events: 15" in report_text + assert "- rule_hits: 5" in report_text + assert "- cases: 3" in report_text + assert "- accepted_summaries: 3" in report_text + assert "- rejected_summaries: 0" in report_text + assert "- audit_records: 3" in report_text + + +def test_malformed_attack_metadata_is_rejected_and_recorded(tmp_path) -> None: demo_root = _copy_demo_root(tmp_path) rules_path = demo_root / "config" / "rules.yaml" rules_config = load_yaml(rules_path) @@ -356,7 +380,7 @@ def test_audit_traces_capture_accepted_and_rejected_paths(tmp_path) -> None: assert "Rejection reason: missing_required_fields" in report_text -def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> None: +def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> None: demo_root, _, _, _, _, _ = _demo_inputs() llm = ScriptedLlm( [ @@ -391,6 +415,14 @@ def test_case_id_mismatch_is_rejected_and_not_counted_as_accepted(tmp_path) -> N assert mismatch_record["raw_response_excerpt"] is not None report_text = (tmp_path / "artifacts" / "case_report.md").read_text(encoding="utf-8") - assert "## CASE-001" in report_text - assert "Summary status: rejected" in report_text - assert "Rejection reason: case_id_mismatch" in report_text + assert "## CASE-001" in report_text + assert "Summary status: rejected" in report_text + assert "Rejection reason: case_id_mismatch" in report_text + + +def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None: + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.write_text("not a directory\n", encoding="utf-8") + + with pytest.raises(ValueError, match="Output directory path is not a directory"): + run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir) diff --git a/tests/test_cli_subprocess.py b/tests/test_cli_subprocess.py new file mode 100644 index 0000000..888f683 --- /dev/null +++ b/tests/test_cli_subprocess.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +import os +import subprocess +import sys +from pathlib import Path + +import yaml + +from telemetry_window_demo.io import load_config + + +def _cli_env(repo_root: Path) -> dict[str, str]: + env = os.environ.copy() + src_path = str(repo_root / "src") + existing_pythonpath = env.get("PYTHONPATH") + env["PYTHONPATH"] = ( + src_path + if not existing_pythonpath + else os.pathsep.join((src_path, existing_pythonpath)) + ) + return env + + +def test_readme_summarize_command_runs_as_module() -> None: + repo_root = Path(__file__).resolve().parents[1] + + result = subprocess.run( + [ + sys.executable, + "-m", + "telemetry_window_demo.cli", + "summarize", + "--input", + "data/raw/sample_events.jsonl", + ], + cwd=repo_root, + env=_cli_env(repo_root), + text=True, + capture_output=True, + timeout=30, + ) + + assert result.returncode == 0, result.stderr + assert "events: 41" in result.stdout + assert "overall_error_rate: 0.61" in result.stdout + + +def test_readme_default_run_command_writes_expected_artifacts(tmp_path) -> None: + repo_root = Path(__file__).resolve().parents[1] + config = load_config(repo_root / "configs" / "default.yaml") + output_dir = tmp_path / "processed" + config["input_path"] = str((repo_root / "data" / "raw" / "sample_events.jsonl").resolve()) + config["output_dir"] = str(output_dir.resolve()) + config_path = tmp_path / "default.yaml" + config_path.write_text(yaml.safe_dump(config, sort_keys=False), encoding="utf-8") + + result = subprocess.run( + [ + sys.executable, + "-m", + "telemetry_window_demo.cli", + "run", + "--config", + str(config_path), + ], + cwd=repo_root, + env=_cli_env(repo_root), + text=True, + capture_output=True, + timeout=30, + ) + + assert result.returncode == 0, result.stderr + assert "[OK] Loaded 41 events" in result.stdout + assert "[OK] Triggered 12 alerts" in result.stdout + assert (output_dir / "features.csv").is_file() + assert (output_dir / "alerts.csv").is_file() + assert (output_dir / "summary.json").is_file() + assert (output_dir / "event_count_timeline.png").is_file() + + +def test_plot_command_runs_as_module(tmp_path) -> None: + repo_root = Path(__file__).resolve().parents[1] + output_dir = tmp_path / "plots" + + result = subprocess.run( + [ + sys.executable, + "-m", + "telemetry_window_demo.cli", + "plot", + "--features", + "data/processed/features.csv", + "--alerts", + "data/processed/alerts.csv", + "--output-dir", + str(output_dir), + ], + cwd=repo_root, + env=_cli_env(repo_root), + text=True, + capture_output=True, + timeout=30, + ) + + assert result.returncode == 0, result.stderr + assert "[OK] Saved plots to" in result.stdout + assert (output_dir / "event_count_timeline.png").is_file() + assert (output_dir / "error_rate_timeline.png").is_file() + assert (output_dir / "alerts_timeline.png").is_file() diff --git a/tests/test_config_change_investigation_demo.py b/tests/test_config_change_investigation_demo.py index c097341..a2baf0c 100644 --- a/tests/test_config_change_investigation_demo.py +++ b/tests/test_config_change_investigation_demo.py @@ -210,6 +210,14 @@ def test_run_demo_reports_config_errors_before_loading_inputs(tmp_path) -> None: run_demo(demo_root=demo_root, artifacts_dir=tmp_path / "artifacts") +def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None: + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.write_text("not a directory\n", encoding="utf-8") + + with pytest.raises(ValueError, match="Output directory path is not a directory"): + run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir) + + def test_run_demo_is_deterministic_and_matches_committed_artifacts(tmp_path) -> None: demo_root, _, _, _, _ = _load_demo_inputs() first_dir = tmp_path / "run-one" diff --git a/tests/test_rule_evaluation_and_dedup_demo.py b/tests/test_rule_evaluation_and_dedup_demo.py index 681bec8..15b318c 100644 --- a/tests/test_rule_evaluation_and_dedup_demo.py +++ b/tests/test_rule_evaluation_and_dedup_demo.py @@ -3,6 +3,8 @@ import json from pathlib import Path +import pytest + from telemetry_window_demo.rule_evaluation_and_dedup_demo import default_demo_root, run_demo from telemetry_window_demo.rule_evaluation_and_dedup_demo.pipeline import ( deduplicate_rule_hits, @@ -120,3 +122,11 @@ def test_run_demo_is_deterministic_and_matches_committed_artifacts(tmp_path) -> expected_report = (demo_root / "artifacts" / "dedup_report.md").read_text(encoding="utf-8") assert (first_dir / "dedup_report.md").read_text(encoding="utf-8") == expected_report assert (second_dir / "dedup_report.md").read_text(encoding="utf-8") == expected_report + + +def test_run_demo_rejects_file_artifacts_dir(tmp_path) -> None: + artifacts_dir = tmp_path / "artifacts" + artifacts_dir.write_text("not a directory\n", encoding="utf-8") + + with pytest.raises(ValueError, match="Output directory path is not a directory"): + run_demo(demo_root=default_demo_root(), artifacts_dir=artifacts_dir) diff --git a/tests/test_windowing.py b/tests/test_windowing.py index a50a1b5..47181c5 100644 --- a/tests/test_windowing.py +++ b/tests/test_windowing.py @@ -1,6 +1,7 @@ from __future__ import annotations import pandas as pd +import pytest from telemetry_window_demo.preprocess import normalize_events from telemetry_window_demo.windowing import build_windows @@ -90,3 +91,32 @@ def test_build_windows_handles_microsecond_backed_timestamps() -> None: assert windows[0].end_index == 3 assert windows[1].start_index == 1 assert windows[1].end_index == 3 + + +def test_build_windows_rejects_unsorted_timestamps() -> None: + events = pd.DataFrame( + [ + { + "timestamp": pd.Timestamp("2026-03-10T10:00:20Z"), + "event_type": "login_fail", + "source": "user_b", + "target": "auth", + "status": "fail", + }, + { + "timestamp": pd.Timestamp("2026-03-10T10:00:00Z"), + "event_type": "login_success", + "source": "user_a", + "target": "auth", + "status": "ok", + }, + ] + ) + + with pytest.raises(ValueError, match="sorted by timestamp"): + build_windows( + events, + timestamp_col="timestamp", + window_size_seconds=60, + step_size_seconds=10, + )