diff --git a/docs/AUTONOMY_TRAINING.md b/docs/AUTONOMY_TRAINING.md new file mode 100644 index 0000000..3063e4c --- /dev/null +++ b/docs/AUTONOMY_TRAINING.md @@ -0,0 +1,54 @@ +# MedSim Autonomy Training + +MedSim autonomy support is simulation-only. It is not live robot control, not +patient care, and not autonomous surgery. + +## Current Implementation + +- Internal `AutonomyEnv` with `reset`, `step`, `render`, and `close`. +- Observation schema includes tool pose, needle pose, gripper state, target + state, placeholder deformation/contact summaries, safety state, task phase, + previous command, and camera quality. +- Action schema includes tool deltas, rotation deltas, gripper command, + primitive command, no-op, and emergency stop. +- Safety shield rejects unknown tools, excessive command deltas, and emergency + stop commands before backend execution. +- Baseline policies: + - `scripted` + - `random` +- Reward output includes completion reward, collision penalty, unsafe command + penalty, and time penalty. + +## Commands + +```bash +python3 -m medsim.cli autonomy benchmark \ + --backend placeholder \ + --scenario configs/scenarios/normal.yaml \ + --episodes 10 +``` + +```bash +python3 -m medsim.cli autonomy evaluate-policy \ + --policy scripted \ + --scenario configs/scenarios/normal.yaml +``` + +```bash +python3 -m medsim.cli autonomy export-demos \ + --runs artifacts/runs \ + --out artifacts/datasets/demo_policy +``` + +## Artifacts + +Autonomy benchmark runs write: + +- `policy_rollout.json` +- `observations.jsonl` +- `actions.jsonl` +- `rewards.jsonl` +- `safety_events.jsonl` +- `autonomy_metrics.json` + +All outputs are non-patient simulation artifacts. diff --git a/docs/HOSPITAL_PILOT_READINESS.md b/docs/HOSPITAL_PILOT_READINESS.md new file mode 100644 index 0000000..50f4d47 --- /dev/null +++ b/docs/HOSPITAL_PILOT_READINESS.md @@ -0,0 +1,37 @@ +# Hospital Pilot Readiness + +MedSim hospital pilot mode is supervised, non-patient, and evaluation-only. + +## Allowed Pilot Scope + +- Research, training, simulation, and phantom/bench evaluation. +- No patient care. +- No clinical decision support. +- No live autonomous surgery. +- Audit logging and artifact export required. +- Risk acknowledgement required for `hospital_pilot_non_patient` mode. + +## Metadata + +Run artifacts carry pilot metadata from `SceneConfig.pilot`: + +- `use_mode` +- `risk_acknowledged` +- `operator_id` +- `institution` +- `reviewer` +- `approved_protocol_id` +- `intended_use` + +## Checklist + +- Confirm non-patient protocol. +- Record operator and reviewer. +- Record approved protocol ID where applicable. +- Export validation bundle. +- Review cybersecurity settings. +- Review fidelity disclosure. + +Clinical use, patient-care decisions, or autonomous real-world actuation require +formal validation, domain expert review, QMS controls, cybersecurity review, and +regulatory clearance where applicable. diff --git a/docs/PHANTOM_VALIDATION_PROTOCOL.md b/docs/PHANTOM_VALIDATION_PROTOCOL.md new file mode 100644 index 0000000..629b3a0 --- /dev/null +++ b/docs/PHANTOM_VALIDATION_PROTOCOL.md @@ -0,0 +1,29 @@ +# Phantom Validation Protocol + +MedSim includes a scaffold for comparing simulation runs against bench/phantom +trial data. The current fixture is synthetic and validates the pipeline only. + +## Bench Metadata + +Bench data must identify phantom material, geometry, camera/tracker system, +calibration metadata, operator, instrument setup, trial ID, task type, recorded +trajectory, target points, deformation proxy, contact timing if available, notes, +and data quality flags. + +## Metrics + +The comparison pipeline reports: + +- trajectory RMSE +- endpoint error +- needle pose error +- contact timing error when available +- deformation proxy error +- task success agreement +- safety event agreement when available + +## Claim Boundary + +`phantom_validated` fidelity is blocked unless real phantom comparison artifacts +exist and satisfy externally reviewed acceptance criteria. Synthetic fixtures do +not permit phantom-validated claims. diff --git a/docs/PRODUCTION_BUILD_LOG.md b/docs/PRODUCTION_BUILD_LOG.md new file mode 100644 index 0000000..3eb08af --- /dev/null +++ b/docs/PRODUCTION_BUILD_LOG.md @@ -0,0 +1,650 @@ +# MedSim Production Build Log + +Date: 2026-05-14 +Repo root: `/Users/mihirmodi/Documents/Simular/medsim` +Branch target from goal: `feat/workbench-visual-upgrade` + +## Build Objective + +Build MedSim into a production-grade surgical robotics simulation, synthetic-data, +autonomy-training, validation, and hospital-pilot evaluation platform while +preserving honest claim boundaries: + +- No clinical, patient-care, FDA-cleared, or autonomous live-surgery claims. +- Placeholder backend remains deterministic infrastructure validation only. +- SOFA must be reported as unavailable unless real local SOFA execution works. +- Clinical/hospital/autonomous readiness requires external validation, QMS, + cybersecurity, domain-expert, and regulatory review. + +## Baseline Repository State + +Initial status observed before implementation work: + +```text + M docs/workbench.md + M frontend/src/components/InspectPanel.tsx + M frontend/src/components/RunPanel.tsx + M frontend/src/components/viewer/assetSceneConfig.ts + M frontend/src/styles.css +?? goal.md +``` + +`docs/PRODUCTION_BUILD_LOG.md` did not exist and was created as the first +checkpoint artifact. + +Pre-existing modified files are treated as user/worktree changes and will not +be reverted. + +## Baseline Commands + +Baseline run results: + +- `python -m pytest -q`: failed to start because `python` is not on PATH. +- `python3 -m pytest -q`: passed, `51 passed, 1 skipped in 0.71s`. +- `ruff check .`: failed to start because `ruff` is not on PATH. +- `python3 -m ruff check .`: passed, `All checks passed!`. +- mypy/pyright: not configured in `pyproject.toml` or frontend package scripts. +- `npm test`: not configured in `frontend/package.json`. +- `npm run build`: passed. Vite emitted a large chunk warning for the main JS + bundle but build completed successfully. +- Project-specific CLI/API checks: pending after audit. + +## Current Backend Capability Snapshot + +Audit notes before Phase 1 changes: + +- `src/medsim/sim/backend.py` defines a smaller `SimulationBackend`, + `BackendInfo`, `BackendCapability`, and `CapabilityStatus` contract. +- `PlaceholderBackend` runs and reports deterministic reset/action replay, + structured state/observation/action export, recorder compatibility, and + taxonomy support. It honestly reports contact, deformable state, force, image, + depth, and segmentation support as unsupported. +- `SofaBackend` imports safely and exposes an honest skeleton. Without SOFA it + reports no deterministic reset/replay and `initialize()` raises + `SofaDependencyError`. Its current support is scene-plan/runtime metadata only, + not physics stepping. +- FastAPI currently exposes workbench endpoints under `/api/*`, including + `/api/health`, `/api/metadata`, prompt compile, scene preview, run/session + state, events, artifacts, advance/cancel, and SOFA preview. It does not yet + expose production-style `/api/backends` health/capability endpoints. +- CLI currently supports `run-scene`, `generate-dataset`, `eval`, + `replay-validate`, and `check-backend-contract`. It does not yet support + backend listing or backend health commands. +- Recorder/export writes the earlier artifact layout: + `run_manifest.json`, `config_snapshot.json`, `aggregate_metrics.json`, + `replay_validation.json`, and episode JSONL/summary files under `episodes/`. + It does not yet write the full Phase 5 audit-grade layout. + +## Chosen Architecture Plan + +Proceed checkpoint-by-checkpoint, beginning with Phase 1: + +1. Audit current backend/API/config/data/test shape. +2. Add or upgrade backend protocol, capabilities, registry, health, and errors + without breaking placeholder execution. +3. Add graceful SOFA preflight/reporting and tests for missing dependencies. +4. Preserve placeholder determinism and strengthen artifacts/provenance. +5. Continue through scenario validation, dataset/export, replay/eval, autonomy, + validation, QMS, API, frontend, docs, and demo pipeline with tests after + major checkpoints. + +## Checkpoint Progress + +- [x] Read `goal.md`. +- [x] Created `docs/PRODUCTION_BUILD_LOG.md`. +- [ ] Completed repository audit. +- [x] Ran baseline checks. +- [x] Implemented Phase 1 production backend abstraction. +- [ ] Implemented Phase 2 placeholder backend hardening. +- [x] Implemented Phase 3 SOFA backend path. +- [ ] Implemented Phase 4 physics/scenario config system. +- [x] Implemented Phase 5 trace/recorder/dataset/provenance system. +- [x] Implemented Phase 6 replay/evaluation engine. +- [x] Implemented Phase 7 autonomy training infrastructure. +- [x] Implemented Phase 8 safe robot bench integration architecture. +- [x] Implemented Phase 9 phantom/bench validation workflow. +- [x] Implemented Phase 10 FastAPI production API. +- [ ] Implemented Phase 11 frontend production workbench. +- [x] Implemented Phase 12 hospital pilot mode. +- [x] Implemented Phase 13 validation bundle export. +- [ ] Implemented later phases. +- [ ] Ran final acceptance audit. + +## Command Log + +| Time | Command | Result | Notes | +| --- | --- | --- | --- | +| 2026-05-14 | `pwd` | pass | Confirmed repo root. | +| 2026-05-14 | `sed -n '1,240p' goal.md` | pass | Read first part of build instructions. | +| 2026-05-14 | `sed -n '241,520p' goal.md` | pass | Read phases 3-7. | +| 2026-05-14 | `sed -n '521,900p' goal.md` | pass | Read phases 7-14. | +| 2026-05-14 | `sed -n '901,1360p' goal.md` | pass | Read phases 14-20 and final response requirements. | +| 2026-05-14 | `git status --short` | pass | Found pre-existing modifications listed above. | +| 2026-05-14 | `python -m pytest -q` | fail | `python` executable not found in shell. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `51 passed, 1 skipped in 0.71s`. | +| 2026-05-14 | `ruff check .` | fail | `ruff` executable not found in shell. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Ruff module is available through Python. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite warned that main chunk exceeds 500 kB. | +| 2026-05-14 | `python3 --version` | pass | Python 3.13.2. | +| 2026-05-14 | `python3 scripts/run_scene.py --scene configs/base_scene.yaml --scenario configs/scenarios/normal.yaml --seed 1` | pass | Wrote `artifacts/runs/run_20260514T204241815012Z`; normal scenario succeeded in 13 steps. | +| 2026-05-14 | `python3 scripts/check_backend_contract.py --backend placeholder --scene configs/base_scene.yaml --scenarios configs/scenarios/normal.yaml configs/scenarios/camera_occlusion.yaml` | pass | Placeholder backend contract passed; report under `artifacts/compliance/placeholder_20260514T204241877337Z/`. | +| 2026-05-14 | `python3 scripts/check_backend_contract.py --backend sofa --scene configs/base_scene.yaml --scenarios configs/scenarios/normal.yaml` | expected fail | Honest SOFA skeleton failed `lifecycle_runnable`; report under `artifacts/compliance/sofa_20260514T204241815855Z/`. | +| 2026-05-14 | `python3 -m pytest tests/test_backend_registry.py tests/test_backend_capabilities.py tests/test_sofa_adapter_skeleton.py tests/test_workbench_api.py -q` | pass | `25 passed, 1 skipped`; verified registry, health/capability API, and missing-SOFA behavior. | +| 2026-05-14 | `python3 -m medsim.cli backends list` | pass | Listed placeholder and SOFA capability payloads. | +| 2026-05-14 | `python3 -m medsim.cli backends health sofa` | pass | Reported SOFA `unavailable` with install hint and verification command. | +| 2026-05-14 | `python3 -m ruff check .` | fail then pass | First run caught import sorting and two line-length issues; fixed and reran successfully. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `56 passed, 1 skipped in 0.95s`. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; same Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_manifest_and_taxonomy.py tests/test_replay_validation.py tests/test_workbench_api.py -q` | pass | `15 passed`; verified placeholder artifact claim-boundary metadata and replay result metadata. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `57 passed, 1 skipped in 0.86s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after artifact metadata updates. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 scripts/run_scene.py --scene configs/base_scene.yaml --scenario configs/scenarios/tool_collision.yaml --seed 2` | pass | Wrote a placeholder collision run with synthetic contact records; outcome was expected failure after collision limit. | +| 2026-05-14 | `python3 -m pytest tests/test_manifest_and_taxonomy.py tests/test_replay_validation.py -q` | pass | `6 passed`; verified synthetic contact/deformation labels and replay metadata. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after placeholder contact/deformation fields. | +| 2026-05-14 | `python3 -m pytest tests/test_sofa_adapter_skeleton.py tests/test_workbench_api.py -q` | pass | `21 passed, 1 skipped`; verified SOFA preflight, compatibility modules, material placeholder, and API endpoint. | +| 2026-05-14 | `python3 -m medsim.cli sofa preflight --scene configs/base_scene.yaml --scenario configs/scenarios/normal.yaml --attempt-runtime` | pass | Reported `available: false`, `scene_plan_valid: true`, `runtime_build_succeeded: false`, install hint, verification command, and no fake SOFA run. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `60 passed, 1 skipped in 1.05s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after SOFA preflight additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_scenario_validation.py tests/test_config_loading.py tests/test_workbench_api.py -q` | pass | `17 passed`; verified production schema defaults, validation reports, unsafe claim blocking, and API validation endpoint. | +| 2026-05-14 | `python3 -m medsim.cli validate-config configs/base_scene.yaml` | pass | Reported valid scene config with `placeholder_deterministic` fidelity. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `65 passed, 1 skipped in 1.00s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 4 validation additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_artifact_bundle_and_dataset.py tests/test_replay_validation.py tests/test_workbench_api.py -q` | pass | `15 passed`; verified run-level artifact bundle and dataset export/validation. | +| 2026-05-14 | `python3 -m medsim.cli generate-dataset --scene configs/base_scene.yaml --scenarios configs/scenarios/normal.yaml configs/scenarios/camera_occlusion.yaml --episodes 2 --seed 120` | pass | Wrote `artifacts/runs/run_20260514T210701411797Z` with Phase 5 bundle. | +| 2026-05-14 | `python3 -m medsim.cli dataset-index artifacts/runs` | pass | Indexed 42 local run directories; newest run has `run_summary.json`. | +| 2026-05-14 | `python3 -m medsim.cli export-dataset --runs artifacts/runs --out artifacts/datasets/phase5_smoke` | pass | Wrote generic JSONL, imitation-learning JSONL, RL transitions JSONL, metadata CSV, and manifest JSON. | +| 2026-05-14 | `python3 -m medsim.cli validate-dataset artifacts/datasets/phase5_smoke` | pass after sequential rerun | Initial parallel validation raced export and failed before files existed; rerun passed with no missing/empty files. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `67 passed, 1 skipped in 1.51s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 5 additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_eval_runner.py tests/test_artifact_bundle_and_dataset.py tests/test_replay_validation.py -q` | pass | `7 passed`; verified eval report files, run comparison, and placeholder benchmark helper. | +| 2026-05-14 | `python3 -m medsim.cli run-eval --runs artifacts/runs/run_20260514T210701411797Z --out artifacts/eval/phase6_smoke` | pass | Wrote `eval_summary.json`, `eval_report.md`, `runs.csv`, `failures.json`, and `metric_distributions.json`. | +| 2026-05-14 | `python3 -m medsim.cli benchmark --scenario configs/scenarios/normal.yaml --backend placeholder --episodes 2` | pass | Wrote placeholder benchmark run and eval report; replay pass rate 1.000. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `70 passed, 1 skipped in 1.84s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 6 additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_autonomy.py tests/test_eval_runner.py -q` | pass | `6 passed`; verified safety shield, autonomy benchmark artifacts, and demo export. | +| 2026-05-14 | `python3 -m medsim.cli autonomy benchmark --backend placeholder --scenario configs/scenarios/normal.yaml --episodes 2 --out artifacts/runs/autonomy_phase7_smoke` | pass | Wrote autonomy rollout artifacts and metrics with `simulation_only: true`. | +| 2026-05-14 | `python3 -m medsim.cli autonomy export-demos --runs artifacts/runs --out artifacts/datasets/demo_policy_phase7` | pass | Exported 82 action/observation demo rows from available autonomy runs. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `73 passed, 1 skipped in 1.82s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 7 additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_robotics_dry_run.py -q` | pass | `3 passed`; verified dry-run robot preview, no actuation, safety rejection, and calibration defaults. | +| 2026-05-14 | `python3 -m pytest tests/test_robotics_dry_run.py tests/test_phantom_validation.py -q` | pass | `4 passed`; verified dry-run robotics and synthetic phantom comparison report. | +| 2026-05-14 | `python3 -m medsim.cli phantom validate-fixture --out artifacts/validation/phase9_fixture` | pass | Wrote synthetic bench fixture and kept `phantom_validated_claim_allowed: false`. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `77 passed, 1 skipped in 1.88s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 8/9 additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest tests/test_workbench_api.py -q` | pass | `13 passed`; verified production API additions, path traversal rejection, and optional API-key auth. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `79 passed, 1 skipped in 2.05s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after production API/security additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Frontend warnings/disclosures compiled successfully; Vite large chunk warning remains. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `79 passed, 1 skipped in 2.08s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Backend lint remained clean after frontend work. | +| 2026-05-14 | `python3 -m pytest tests/test_manifest_and_taxonomy.py tests/test_scenario_validation.py -q` | pass | `9 passed`; verified pilot metadata in artifacts and hospital pilot risk acknowledgement gate. | +| 2026-05-14 | `python3 -m pytest tests/test_validation_bundle.py tests/test_manifest_and_taxonomy.py tests/test_workbench_api.py -q` | pass | `18 passed`; verified validation bundle export and API remained healthy. | +| 2026-05-14 | `python3 -m pytest -q` | pass | `81 passed, 1 skipped in 2.10s`. | +| 2026-05-14 | `python3 -m ruff check .` | pass | Lint clean after Phase 12/13 additions. | +| 2026-05-14 | `npm run build` from `frontend/` | pass | Production build completed; Vite large chunk warning remains. | + +## Completed Checkpoints + +### Phase 1 - Production Backend Abstraction + +Implemented: + +- `src/medsim/sim/capabilities.py` with `BackendCapabilities`, + `BackendHealth`, health status, fidelity levels, and honest placeholder/SOFA + capability builders. +- `src/medsim/sim/errors.py` backend error hierarchy. +- `src/medsim/sim/base.py` runtime-checkable `BackendProtocol`. +- `src/medsim/sim/backend_registry.py` default registry for `placeholder` and + `sofa`. +- Default `SimulationBackend.capabilities()`, `health()`, + `validate_scenario()`, and `replay_metadata()` helpers. +- Placeholder-specific capability/health reporting with + `placeholder_deterministic` fidelity. +- SOFA-specific capability/health reporting with + `sofa_unavailable_adapter_only` when missing and degraded + `sofa_minimal_unvalidated` when importable. +- CLI backend inspection commands: + - `python3 -m medsim.cli backends list` + - `python3 -m medsim.cli backends health ` + - `python3 -m medsim.cli backends capabilities ` +- FastAPI production-style endpoints: + - `GET /health` and `GET /api/health` + - `GET /version` and `GET /api/version` + - `GET /dependencies` and `GET /api/dependencies` + - `GET /backends` and `GET /api/backends` + - `GET /backends/{name}/health` and `/api/backends/{name}/health` + - `GET /backends/{name}/capabilities` and + `/api/backends/{name}/capabilities` +- Tests for registry listing, unknown backend errors, placeholder health, + graceful missing-SOFA health, and API endpoint behavior. + +Acceptance evidence: + +- Placeholder backend still runs and passes backend compliance. +- Registry lists placeholder and SOFA. +- Missing SOFA does not break imports, CLI listing, API metadata, or tests. +- Health/capability endpoints and CLI commands work. +- Tests cover registry, health, missing dependencies, and placeholder execution + through existing compliance/API suites. + +### Phase 2 - Placeholder Artifact Claim-Boundary Hardening (Partial) + +Implemented: + +- `src/medsim/data/claims.py` centralizes artifact claim-boundary metadata. +- `StepRecord`, `EpisodeSummary`, `RunManifest`, and + `ReplayValidationResult` now include: + - `backend` + - `backend_version` + - `fidelity_level` + - `physical_accuracy` + - `intended_use` + - `disallowed_uses` +- Placeholder runs write `backend: placeholder`, + `fidelity_level: placeholder_deterministic`, `physical_accuracy: false`, + and `intended_use: infrastructure_validation` into run manifests, config + snapshots, aggregate metrics, episode summaries, JSONL step records, and + replay validation results. +- Step records now include `contacts` and `deformation_summary` fields. These + are explicitly labeled `placeholder_synthetic`, `physical_accuracy: false`, + with null physical forces and limitations explaining that no tissue/contact + mechanics are modeled. +- Existing deterministic replay remains functional. +- Workbench-created placeholder runs pass the same metadata into the recorder. + +Evidence: + +- New artifact completeness assertions in `tests/test_manifest_and_taxonomy.py`. +- Replay claim-boundary assertions in `tests/test_replay_validation.py`. +- Full pytest/ruff/frontend build passed after the change. + +Still open in Phase 2 / deferred to Phase 7: + +- Add baseline autonomy benchmark under the dedicated autonomy phase. + Scripted policy rollout already exists through `NeedlePassingTask.scripted_action` + and `SurgicalEnv.run_episode()`, but it is not yet exposed as the Phase 7 + autonomy benchmark interface. + +### Phase 3 - Real SOFA Backend Path + +Implemented: + +- `src/medsim/sim/sofa_preflight.py` with structured preflight reports for + dependency availability, backend health, capabilities, scene-plan validity, + canonical scene components, optional runtime-build attempt, install hint, and + verification command. +- `src/medsim/sim/sofa_scene_builder.py` compatibility exports for scene + planning/building. +- `src/medsim/sim/sofa_extractors.py` compatibility exports for state extraction + and conversion. +- `src/medsim/sim/sofa_materials.py` unvalidated default material parameter + model that explicitly sets `physical_accuracy: false`. +- CLI command: + `python3 -m medsim.cli sofa preflight --scene ... --scenario ...` +- API endpoints: + - `GET /api/sofa/preflight` + - `GET /sofa/preflight` +- `docs/SOFA_BACKEND.md` with current status, verification commands, install + hint, and claim boundary. + +Local status: + +- SOFA is not installed/importable in this environment. +- Preflight validates MedSim scene-plan mapping but reports SOFA runtime + unavailable. +- `--attempt-runtime` does not fake success; it reports + `runtime_build_succeeded: false` when SOFA is absent. + +Acceptance evidence: + +- App/API does not crash without SOFA. +- CLI/API clearly report SOFA unavailable with install guidance. +- Canonical scene components are represented in the scene plan. +- Existing optional SOFA runtime integration test remains skipped unless SOFA is + installed. +- Documentation explains what is implemented and what remains. + +### Phase 4 - Physics and Scenario Config System (Partial) + +Implemented: + +- Extended `SceneConfig`/`ScenarioConfig` with backward-compatible production + fields and defaults for: + - tissue geometry + - tissue/material properties + - solver params + - collision/contact params + - needle geometry/material params + - laparoscopic tool control params + - lighting/visual params + - domain randomization metadata + - success criteria + - safety boundaries + - autonomy reward config + - dataset split metadata + - provenance metadata +- Added schema/export modules: + - `src/medsim/config/schema.py` + - `src/medsim/sim/materials.py` + - `src/medsim/sim/contact.py` + - `src/medsim/sim/instruments.py` + - `src/medsim/sim/tasks.py` + - `src/medsim/sim/scenario_validation.py` +- Added `medsim validate-config ` CLI command. +- Added API endpoints: + - `POST /api/scenarios/validate` + - `POST /scenarios/validate` +- Validation catches invalid ranges, unknown schema versions, backend mismatch, + unavailable SOFA warning, prohibited patient-care use modes, blocked + clinically validated fidelity claims, and phantom-validated claims without + phantom artifacts. + +Evidence: + +- Existing configs still load with production defaults. +- Invalid material ranges fail clearly. +- Unsafe patient-care/clinical validation claims fail clearly. +- Full pytest/ruff/frontend build passed. + +Still open in Phase 4 / deferred to frontend workbench phase: + +- Add visible frontend validation feedback around the scenario builder/editor. + +### Phase 5 - Trace, Recorder, Dataset, Provenance System + +Implemented: + +- `src/medsim/data/artifacts.py` writes a run-level artifact bundle: + - `run_summary.json` + - `scenario_resolved.json` + - `backend_capabilities.json` + - `environment.json` + - `provenance.json` + - `metrics.json` + - `validation.json` + - `replay_validation.json` + - `states.jsonl` + - `events.jsonl` + - `commands.jsonl` + - `contacts.jsonl` + - `observations.jsonl` + - `actions.jsonl` + - `rewards.jsonl` + - `artifact_manifest.json` + - `checksums.json` +- CLI and workbench placeholder runs now write the Phase 5 bundle in addition + to existing episode artifacts. +- `src/medsim/data/dataset.py` implements: + - dataset indexing + - dataset summary + - generic JSONL export + - imitation-learning JSONL export + - RL transition JSONL export + - metadata CSV export + - manifest JSON export + - dataset validation +- CLI commands: + - `medsim dataset-index ` + - `medsim dataset-summary ` + - `medsim export-dataset --runs --out ` + - `medsim validate-dataset ` + +Evidence: + +- New tests in `tests/test_artifact_bundle_and_dataset.py`. +- Smoke dataset exported to `artifacts/datasets/phase5_smoke`. +- Full pytest/ruff/frontend build passed. + +Known limitations: + +- Phase 5 bundle streams are derived from placeholder episode JSONL records. + They are complete infrastructure artifacts, not physics-derived measurements. + SOFA runs will need to populate the same streams from real runtime state when + SOFA stepping exists. + +### Phase 6 - Replay and Evaluation Engine + +Implemented: + +- `src/medsim/eval/runner.py` for eval artifact generation and run comparison. +- CLI commands: + - `medsim replay-validate ...` existed and remains working. + - `medsim run-eval --runs --out ` + - `medsim compare-runs ` + - `medsim benchmark --scenario --backend placeholder --episodes ` + - `medsim benchmark --scenario --backend sofa --episodes ` reports + SOFA preflight/unavailable instead of faking a benchmark. +- Eval output includes: + - `eval_summary.json` + - `eval_report.md` + - `runs.csv` + - `failures.json` + - `metric_distributions.json` +- Placeholder benchmark generates a run, replay-validates it, and writes an eval + report. + +Evidence: + +- New tests in `tests/test_eval_runner.py`. +- CLI smoke eval under `artifacts/eval/phase6_smoke`. +- CLI placeholder benchmark under `artifacts/eval/benchmark_*`. +- Full pytest/ruff/frontend build passed. + +Known limitations: + +- Metric distributions are summary-level and placeholder-derived. Contact force, + deformation, image, depth, and physics drift metrics remain unavailable until + a real physics backend emits those channels. + +### Phase 7 - Autonomy Training Infrastructure + +Implemented: + +- `src/medsim/autonomy/` package: + - `schemas.py` + - `env.py` + - `policies.py` + - `rewards.py` + - `safety.py` + - `benchmark.py` + - `export.py` +- Internal `AutonomyEnv` with `reset`, `step`, `render`, and `close`. +- Observation/action/reward/safety schemas. +- Safety shield for unknown tools, excessive command deltas, and emergency stop. +- Baseline policies: + - `scripted` + - `random` +- Autonomy benchmark artifacts: + - `policy_rollout.json` + - `observations.jsonl` + - `actions.jsonl` + - `rewards.jsonl` + - `safety_events.jsonl` + - `autonomy_metrics.json` +- CLI commands: + - `medsim autonomy benchmark --backend placeholder --scenario --episodes ` + - `medsim autonomy export-demos --runs --out ` + - `medsim autonomy evaluate-policy --policy scripted --scenario ` +- `docs/AUTONOMY_TRAINING.md` + +Evidence: + +- New tests in `tests/test_autonomy.py`. +- CLI autonomy benchmark wrote `artifacts/runs/autonomy_phase7_smoke`. +- CLI demo export wrote `artifacts/datasets/demo_policy_phase7`. +- Full pytest/ruff/frontend build passed. + +Claim boundary: + +- Autonomy outputs are simulation-only and set `no_patient_use: true`. +- SOFA autonomy benchmark reports preflight/unavailable instead of faking policy + execution when SOFA is missing. + +### Phase 8 - Safe Robot Bench Integration Architecture + +Implemented: + +- `src/medsim/robotics/` package: + - `interface.py` + - `dry_run.py` + - `safety.py` + - `calibration.py` +- Dry-run command preview only; `would_actuate: false` always. +- Safety limits for max delta, max rotation, emergency stop, disabled hardware, + and `NO PATIENT USE` metadata. +- Calibration requirement schema. +- `docs/ROBOT_BENCH_INTEGRATION.md` + +Evidence: + +- `tests/test_robotics_dry_run.py` +- Full pytest/ruff/frontend build passed. + +### Phase 9 - Phantom / Bench Validation Workflow + +Implemented: + +- `src/medsim/validation/` package: + - `schemas.py` + - `phantom.py` + - `compare.py` +- Bench metadata schema with phantom material, geometry, tracker system, + calibration, operator, instrument setup, trial ID, task type, trajectory, + targets, deformation proxy, contact timing, notes, and quality flags. +- Sim-vs-bench comparison metrics: + - trajectory RMSE + - endpoint error + - needle pose error + - contact timing error placeholder + - deformation proxy error + - task success agreement + - safety event agreement placeholder +- CLI commands: + - `medsim phantom compare --sim-run --bench-data --out ` + - `medsim phantom validate-fixture --out ` +- `docs/PHANTOM_VALIDATION_PROTOCOL.md` + +Evidence: + +- `tests/test_phantom_validation.py` +- Synthetic fixture validation smoke under `artifacts/validation/phase9_fixture`. +- `phantom_validated_claim_allowed` remains false for synthetic fixtures. + +### Phase 10 - FastAPI Production API + +Implemented: + +- Environment-driven CORS via `MEDSIM_CORS_ORIGINS`. +- Optional API key middleware via `MEDSIM_API_KEY`; local dev remains unauthenticated + when no API key is configured. +- Production-style endpoints added around the existing workbench API: + - `GET /health`, `/version`, `/dependencies`, `/backends` + - `GET /backends/{name}/health` + - `GET /backends/{name}/capabilities` + - `POST /scenarios/validate` + - `GET /runs/{run_id}/metrics` + - `GET /runs/{run_id}/states` + - `POST /eval` + - `POST /dataset/index` + - `POST /dataset/export` + - `POST /autonomy/benchmark` + - `POST /autonomy/export` + - `POST /validation/phantom/compare` +- Artifact path escape checks for API-triggered artifact operations. + +Evidence: + +- `tests/test_workbench_api.py` covers endpoint behavior, path traversal + rejection, and optional auth. +- Full pytest/ruff/frontend build passed. + +### Phase 15 - Security Hardening (Partial) + +Implemented so far: + +- Artifact path traversal prevention for API artifact operations. +- CORS environment config. +- Optional API key auth. +- Dependency/version endpoints. +- Safe missing-backend errors through registry/API. + +Still open: + +- Request size limit enforcement from `MEDSIM_MAX_CONFIG_BYTES`. +- Run timeout enforcement from `MEDSIM_RUN_TIMEOUT_SECONDS`. +- Structured audit logging and no-secret log tests. +- Deployment security docs/SBOM notes. + +### Phase 11 - Frontend Production Workbench (Partial) + +Implemented: + +- Added mandatory visible warnings/disclosures in the workbench: + - Placeholder backend is deterministic infrastructure validation only. + - SOFA unavailable warning. + - Simulation/phantom autonomy training only; no live patient use. + - Supervised non-patient pilot mode only; not for patient-care decisions or + autonomous surgery. + +Evidence: + +- Frontend production build passed after changes. + +Still open: + +- Full production workbench controls for validation bundles, autonomy benchmark + launch, richer backend capability badges, and hospital pilot metadata editing. + +### Phase 12 - Hospital Pilot Mode + +Implemented: + +- `PilotMetadataConfig` on `SceneConfig`. +- Run manifests include use mode, risk acknowledgement, operator, institution, + reviewer, and approved protocol ID. +- Run summaries include pilot metadata. +- Config validation blocks `prohibited_clinical_use`. +- Config validation requires `risk_acknowledged: true` for + `hospital_pilot_non_patient`. +- UI disclosure warns supervised non-patient mode only. +- `docs/HOSPITAL_PILOT_READINESS.md` + +Evidence: + +- `tests/test_manifest_and_taxonomy.py` +- `tests/test_scenario_validation.py` +- Full pytest/ruff/frontend build passed. + +### Phase 13 - Validation Bundle Export + +Implemented: + +- `src/medsim/validation/bundle.py` +- CLI command: + `medsim export-validation-bundle --run --out ` +- API endpoint: + `POST /api/validation/bundle/export` +- Bundle copies required run files, writes risk/fidelity disclosures, pilot + metadata, manifest, missing-file report, and markdown summary. + +Evidence: + +- `tests/test_validation_bundle.py` +- Full pytest/ruff/frontend build passed. + +## Known Gaps + +- SOFA local availability was tested through backend health and preflight: + SOFA is currently unavailable in this environment. +- Phase 2 is mostly complete for placeholder honesty/artifacts. The remaining + autonomy benchmark belongs to the larger Phase 7 autonomy interface. +- Artifact contract, dataset export, autonomy, validation bundle, QMS, security, + deployment, and final demo pipeline remain unverified. diff --git a/docs/ROBOT_BENCH_INTEGRATION.md b/docs/ROBOT_BENCH_INTEGRATION.md new file mode 100644 index 0000000..b8d3c7a --- /dev/null +++ b/docs/ROBOT_BENCH_INTEGRATION.md @@ -0,0 +1,23 @@ +# Robot Bench Integration + +MedSim includes only a dry-run robotics abstraction. Hardware adapters are +disabled by default and no real robot actuation code is included. + +## Current Scope + +- Preview a command. +- Enforce command delta and rotation limits. +- Preserve emergency-stop state as a no-actuation condition. +- Require `NO PATIENT USE` metadata. +- Define calibration evidence expectations for future bench/phantom adapters. + +## Not Implemented + +- Real robot drivers. +- Network or serial hardware control. +- Live autonomous actuation. +- Patient-care use. + +Any future hardware adapter must be explicitly enabled, separately reviewed, and +limited to supervised bench/phantom use with calibration and emergency-stop +evidence. diff --git a/docs/SOFA_BACKEND.md b/docs/SOFA_BACKEND.md new file mode 100644 index 0000000..d59029b --- /dev/null +++ b/docs/SOFA_BACKEND.md @@ -0,0 +1,63 @@ +# MedSim SOFA Backend + +MedSim includes a dependency-gated SOFA adapter path. It is intentionally honest: +the adapter can build MedSim-owned scene plans without SOFA installed, and it +can report dependency health, but it does not claim physics execution unless a +real local SOFA runtime is importable and the requested runtime operation +succeeds. + +## Current Status + +- Placeholder backend remains the runnable deterministic backend. +- SOFA imports are lazy and optional. +- `SofaBackend.backend_info()`, backend registry health, and preflight reports + work without SOFA installed. +- `build_sofa_scene_plan(scene_config, scenario)` maps the current needle-passing + scene into canonical SOFA planning components: tissue, tools, needle, camera, + targets, scenario perturbations, and expected state fields. +- If SOFA is installed and exposes `Sofa.Core.Node`, `SofaSceneBuilder` can build + a minimal root-node scene with metadata child nodes. +- `SimulationBackend.step()` for SOFA is not implemented. Recorder-compatible + SOFA episodes, real contacts, FEM deformation, rendering, and deterministic + replay are not implemented. + +## Verification + +Run: + +```bash +python3 -m medsim.cli sofa preflight \ + --scene configs/base_scene.yaml \ + --scenario configs/scenarios/normal.yaml +``` + +Optional minimal runtime build attempt: + +```bash +python3 -m medsim.cli sofa preflight \ + --scene configs/base_scene.yaml \ + --scenario configs/scenarios/normal.yaml \ + --attempt-runtime +``` + +When SOFA is missing, the expected status is `available: false` with +`fidelity_level: sofa_unavailable_adapter_only`. That is not a failure of the +placeholder backend. + +## Install Hint + +Install SOFA and SofaPython3 in the active Python environment, then verify: + +```bash +python3 -c "import importlib; print(importlib.import_module('Sofa').__name__)" +``` + +SOFA packaging varies by platform, so MedSim does not pin SOFA as a required +dependency. + +## Claim Boundary + +Do not describe the SOFA adapter as high-fidelity physics, FEM simulation, +phantom validated, or clinically validated until real executable SOFA scenes, +physics-derived state extraction, recorder-compatible artifacts, and validation +evidence exist. diff --git a/docs/qms/CHANGE_CONTROL.md b/docs/qms/CHANGE_CONTROL.md new file mode 100644 index 0000000..c1e7d55 --- /dev/null +++ b/docs/qms/CHANGE_CONTROL.md @@ -0,0 +1,4 @@ +# Change Control + +Changes require scoped review, tests, build verification, build log updates, and +claim-boundary review before release. diff --git a/docs/qms/CLINICAL_VALIDATION_PLAN.md b/docs/qms/CLINICAL_VALIDATION_PLAN.md new file mode 100644 index 0000000..39620ff --- /dev/null +++ b/docs/qms/CLINICAL_VALIDATION_PLAN.md @@ -0,0 +1,4 @@ +# Clinical Validation Plan + +No clinical validation is complete. Clinical claims require protocol approval, +domain expert review, real-world evidence, and regulatory strategy. diff --git a/docs/qms/CYBERSECURITY_PLAN.md b/docs/qms/CYBERSECURITY_PLAN.md new file mode 100644 index 0000000..40d4c17 --- /dev/null +++ b/docs/qms/CYBERSECURITY_PLAN.md @@ -0,0 +1,4 @@ +# Cybersecurity Plan + +Use API key auth for pilot deployments, restrict CORS, isolate artifact paths, +avoid secret logging, and review dependencies before external deployment. diff --git a/docs/qms/DATA_GOVERNANCE.md b/docs/qms/DATA_GOVERNANCE.md new file mode 100644 index 0000000..2a5ae26 --- /dev/null +++ b/docs/qms/DATA_GOVERNANCE.md @@ -0,0 +1,4 @@ +# Data Governance + +MedSim artifacts are non-patient simulation data by default. Do not store PHI in +prompts, metadata, bench files, or artifacts. diff --git a/docs/qms/INTENDED_USE_AND_CLAIMS.md b/docs/qms/INTENDED_USE_AND_CLAIMS.md new file mode 100644 index 0000000..df2bcb8 --- /dev/null +++ b/docs/qms/INTENDED_USE_AND_CLAIMS.md @@ -0,0 +1,8 @@ +# Intended Use and Claims + +Allowed: production-grade infrastructure for non-patient simulation, synthetic +data, replay/evaluation, autonomy training in simulation/phantom contexts, and +QMS-readiness artifacts. + +Disallowed: FDA-cleared, clinically validated, patient-care ready, autonomous +surgery ready, or physically validated SOFA claims without evidence. diff --git a/docs/qms/PRODUCT_REQUIREMENTS.md b/docs/qms/PRODUCT_REQUIREMENTS.md new file mode 100644 index 0000000..b820e1b --- /dev/null +++ b/docs/qms/PRODUCT_REQUIREMENTS.md @@ -0,0 +1,5 @@ +# Product Requirements + +MedSim provides non-patient surgical robotics simulation infrastructure for +research, synthetic data, replay/evaluation, and supervised pilot assessment. +Clinical use requires external validation and regulatory clearance. diff --git a/docs/qms/REGULATORY_STRATEGY_NOTES.md b/docs/qms/REGULATORY_STRATEGY_NOTES.md new file mode 100644 index 0000000..2360dba --- /dev/null +++ b/docs/qms/REGULATORY_STRATEGY_NOTES.md @@ -0,0 +1,4 @@ +# Regulatory Strategy Notes + +Current positioning is non-clinical research and supervised non-patient pilot +evaluation. Regulated use requires formal regulatory review. diff --git a/docs/qms/RELEASE_CHECKLIST.md b/docs/qms/RELEASE_CHECKLIST.md new file mode 100644 index 0000000..97e9aab --- /dev/null +++ b/docs/qms/RELEASE_CHECKLIST.md @@ -0,0 +1,9 @@ +# Release Checklist + +- Tests pass. +- Lint passes. +- Frontend build passes. +- Demo pipeline runs. +- QMS verify passes. +- SOFA status disclosed. +- Clinical claims absent. diff --git a/docs/qms/REQUIREMENTS.yaml b/docs/qms/REQUIREMENTS.yaml new file mode 100644 index 0000000..3903f78 --- /dev/null +++ b/docs/qms/REQUIREMENTS.yaml @@ -0,0 +1,53 @@ +requirements: + - id: MEDSIM-BACKEND-001 + title: Honest backend capability reporting + description: Backends shall report health, fidelity, limitations, and missing dependencies. + rationale: Prevent unsupported physics or clinical claims. + risk_links: [RISK-CLAIMS-001] + verification_method: automated_test + verification_artifacts: [tests/test_backend_registry.py] + implementation_files: [src/medsim/sim/capabilities.py, src/medsim/sim/backend_registry.py] + tests: [python3 -m pytest tests/test_backend_registry.py] + status: implemented + owner: engineering + release_blocking: true + - id: MEDSIM-DATA-001 + title: Audit-grade placeholder artifact bundle + description: Placeholder runs shall produce manifests, checksums, provenance, metrics, and trace streams. + rationale: Support reproducibility and dataset review. + risk_links: [RISK-DATA-001] + verification_method: automated_test + verification_artifacts: [tests/test_artifact_bundle_and_dataset.py] + implementation_files: [src/medsim/data/artifacts.py] + tests: [python3 -m pytest tests/test_artifact_bundle_and_dataset.py] + status: implemented + owner: engineering + release_blocking: true + - id: MEDSIM-SAFETY-001 + title: Non-patient autonomy boundary + description: Autonomy tools shall be simulation-only and carry no-patient-use metadata. + rationale: Prevent misuse as live autonomous surgery. + risk_links: [RISK-AUTO-001] + verification_method: automated_test + verification_artifacts: [tests/test_autonomy.py] + implementation_files: [src/medsim/autonomy] + tests: [python3 -m pytest tests/test_autonomy.py] + status: implemented + owner: engineering + release_blocking: true +risks: + - id: RISK-CLAIMS-001 + title: Unsupported clinical or physics claims + severity: critical + mitigation: UI/API/docs/backend metadata disclose fidelity and prohibited uses. + status: mitigated + - id: RISK-DATA-001 + title: Incomplete provenance + severity: major + mitigation: Run bundles include provenance, environment, manifests, and checksums. + status: mitigated + - id: RISK-AUTO-001 + title: Autonomy misuse + severity: critical + mitigation: Autonomy is simulation-only; robot integration is dry-run only. + status: mitigated diff --git a/docs/qms/RISK_MANAGEMENT_PLAN.md b/docs/qms/RISK_MANAGEMENT_PLAN.md new file mode 100644 index 0000000..4174466 --- /dev/null +++ b/docs/qms/RISK_MANAGEMENT_PLAN.md @@ -0,0 +1,4 @@ +# Risk Management Plan + +Track claim, data, autonomy, security, and validation risks. Critical risks must +have mitigations before pilot use. diff --git a/docs/qms/RISK_REGISTER.md b/docs/qms/RISK_REGISTER.md new file mode 100644 index 0000000..81cf851 --- /dev/null +++ b/docs/qms/RISK_REGISTER.md @@ -0,0 +1,3 @@ +# Risk Register + +Primary machine-readable risks live in `REQUIREMENTS.yaml`. diff --git a/docs/qms/SBOM_NOTES.md b/docs/qms/SBOM_NOTES.md new file mode 100644 index 0000000..9d7d0e0 --- /dev/null +++ b/docs/qms/SBOM_NOTES.md @@ -0,0 +1,4 @@ +# SBOM Notes + +Current dependency inventory is available through `GET /dependencies`. A formal +SBOM should be generated before external pilot deployment. diff --git a/docs/qms/SOFTWARE_REQUIREMENTS_SPEC.md b/docs/qms/SOFTWARE_REQUIREMENTS_SPEC.md new file mode 100644 index 0000000..07a3cc3 --- /dev/null +++ b/docs/qms/SOFTWARE_REQUIREMENTS_SPEC.md @@ -0,0 +1,6 @@ +# Software Requirements Specification + +The software shall provide deterministic placeholder execution, backend +capability reporting, SOFA preflight, artifact bundles, replay validation, +dataset export, autonomy benchmarks, dry-run robotics previews, phantom +comparison, and validation bundle export. diff --git a/docs/qms/TRACEABILITY_MATRIX.md b/docs/qms/TRACEABILITY_MATRIX.md new file mode 100644 index 0000000..b6882e9 --- /dev/null +++ b/docs/qms/TRACEABILITY_MATRIX.md @@ -0,0 +1,5 @@ +# Traceability Matrix + +Traceability source: `docs/qms/REQUIREMENTS.yaml`. + +Run `python3 -m medsim.cli qms trace` to generate a current report. diff --git a/docs/qms/VERIFICATION_PLAN.md b/docs/qms/VERIFICATION_PLAN.md new file mode 100644 index 0000000..c0233bc --- /dev/null +++ b/docs/qms/VERIFICATION_PLAN.md @@ -0,0 +1,4 @@ +# Verification Plan + +Verification uses pytest, ruff, frontend build, CLI smoke commands, replay +validation, dataset validation, QMS verification, and demo pipeline execution. diff --git a/docs/qms/VERIFICATION_REPORT_TEMPLATE.md b/docs/qms/VERIFICATION_REPORT_TEMPLATE.md new file mode 100644 index 0000000..dd7e236 --- /dev/null +++ b/docs/qms/VERIFICATION_REPORT_TEMPLATE.md @@ -0,0 +1,8 @@ +# Verification Report Template + +- Version: +- Commit: +- Commands run: +- Results: +- Known gaps: +- Release decision: diff --git a/docs/workbench.md b/docs/workbench.md index 51fab38..5620b61 100644 --- a/docs/workbench.md +++ b/docs/workbench.md @@ -111,6 +111,85 @@ The viewer never invents position data; it always renders what the backend reports. In preview mode (no run created), it shows the post-reset state from `POST /api/scene/preview`. +## Asset-backed demo controls + +The viewer can optionally use local demo assets for visual context while keeping +the procedural backend-driven scene as the source of truth. These files live +under `frontend/public/demo-assets/`: + +- `hdri/surgery_4k.exr` — optional EXR environment lighting. It is not used + as the default background, so the backend-driven 3D task stays readable. +- `robot/ehosp_robotic_surgery_platform_-_basic.glb` — optional static hero + robot/platform prop. +- `props/surgical_medical_table_9mb.glb` — present for future use, but + intentionally disabled in the current viewer because the procedural stage + keeps the task area clearer. + +Assets are frontend-only and optional. Missing, renamed, slow, or broken assets +must not stop the viewer from rendering; the procedural room, lighting, tissue, +targets, tools, needle, and camera remain available as fallback content. + +Local query parameters can tune the demo without backend changes: + +- `?hdri=0` or `?hdri=1` disables/enables the optional EXR environment. +- `?heroRobot=0` or `?heroRobot=1` disables/enables the optional static robot. +- `?table=0` or `?table=1` is recognized, but table GLB integration is deferred. +- `?cameraPreset=presentation_default` uses the balanced first-load demo view. +- `?cameraPreset=task_focus` prioritizes the backend-driven tissue/task area. +- `?cameraPreset=overview` shows more of the procedural room and stage. +- `?cameraPreset=room_hero` widens the view to include more static robot context. +- `?presentation=1` uses the cleaner default overlay for demos. +- `?presentation=0` shows the full operator rail with asset toggles. +- `?assetDebug=1` expands the in-view status overlay with asset paths, camera + preset, fallback details, and query hints. Debug mode wins over presentation + mode if both are requested. + +The robot asset is a static visual prop only. It is not animated, not controlled +by backend tool state, and not evidence of SOFA or robot-control capability. +The compact status overlay in the viewer states whether HDRI/robot assets are +ready, loading, disabled, failed, or using fallback. It also states that the +table asset is deferred, so demos do not silently misrepresent what loaded. + +## Run / operate the demo + +Install optional workbench dependencies once: + +```bash +python -m pip install -e ".[dev,workbench]" +``` + +Start the API from the repo root: + +```bash +python -m medsim.api --scene configs/base_scene.yaml --scenario-dir configs/scenarios +``` + +Start the frontend in a second terminal: + +```bash +cd frontend +npm install +npm run dev +``` + +Open `http://127.0.0.1:5173`. The viewer has a compact control rail for live +operation: + +- choose camera presets without editing the URL; +- click `Reset view` after orbiting/panning/zooming; +- switch compact, presentation, and debug modes; +- toggle HDRI and the static hero robot; +- copy the synchronized demo URL. + +The controls update query parameters without a page reload, so a shared URL +preserves the same camera preset, asset toggles, and presentation/debug mode. +Presentation mode keeps the scene clean but still says +`Backend-driven task view | static robot context`. Debug mode shows asset paths, +status reasons, and fallback details. `ready` means the optional asset loaded; +`disabled` means the operator turned it off; `failed` means loading failed and +fallback remains active; `deferred` means the table asset is intentionally not +integrated yet. + ## SOFA preview When a user selects `backend_preference="sofa"`, or the prompt mentions SOFA, @@ -126,11 +205,25 @@ no `step()`). ## Data visibility -The inspection panel makes the collected data explicit: +The workbench now surfaces the saved data in the left `Run & Data` panel as +soon as a run exists. The main training/evaluation artifact is the per-step +JSONL log: + +- `artifacts/runs/run_*/episodes/episode_0001.jsonl` — one JSON object per + step, including action, tool poses, needle pose, target metadata, camera + condition, events, failure/outcome fields, and timestamps. +- `artifacts/runs/run_*/episodes/episode_0001_summary.json` — outcome and + aggregate metrics for the episode. +- `artifacts/runs/run_*/run_manifest.json` and `config_snapshot.json` — + provenance and configuration snapshots for the run. + +The UI shows these absolute paths and includes a copy button for the primary +step log path. Across the left run panel and right inspection panel you can +inspect: - the compiled scenario (raw JSON); - the full event timeline (canonical event types, severity, messages); -- the latest step view (tools, needle, targets, camera, active perturbations); +- the selected step view (tools, needle, targets, camera, active perturbations); - the episode summary (outcome, targets passed, min camera quality, per-event-type counts, artifact paths); - absolute paths to `run_manifest.json`, `config_snapshot.json`, diff --git a/frontend/public/demo-assets/hdri/surgery_4k.exr b/frontend/public/demo-assets/hdri/surgery_4k.exr new file mode 100644 index 0000000..1423c36 Binary files /dev/null and b/frontend/public/demo-assets/hdri/surgery_4k.exr differ diff --git a/frontend/public/demo-assets/props/surgical_medical_table_9mb.glb b/frontend/public/demo-assets/props/surgical_medical_table_9mb.glb new file mode 100644 index 0000000..652ff45 Binary files /dev/null and b/frontend/public/demo-assets/props/surgical_medical_table_9mb.glb differ diff --git a/frontend/public/demo-assets/robot/ehosp_robotic_surgery_platform_-_basic.glb b/frontend/public/demo-assets/robot/ehosp_robotic_surgery_platform_-_basic.glb new file mode 100644 index 0000000..b93bd4e Binary files /dev/null and b/frontend/public/demo-assets/robot/ehosp_robotic_surgery_platform_-_basic.glb differ diff --git a/frontend/src/components/InspectPanel.tsx b/frontend/src/components/InspectPanel.tsx index 65421b3..6b6d3b2 100644 --- a/frontend/src/components/InspectPanel.tsx +++ b/frontend/src/components/InspectPanel.tsx @@ -245,6 +245,10 @@ function JsonTab({ step }: { step: StepView | null }) { return (

Raw Step View

+
+ This is the currently selected viewer payload. The full saved training/evaluation + trace is the JSONL step log shown in Run & Data. +
{JSON.stringify(step, null, 2)}
); diff --git a/frontend/src/components/PromptPanel.tsx b/frontend/src/components/PromptPanel.tsx index ca2d53b..f545012 100644 --- a/frontend/src/components/PromptPanel.tsx +++ b/frontend/src/components/PromptPanel.tsx @@ -68,6 +68,8 @@ export function PromptPanel({ + +
Example prompts @@ -112,6 +114,34 @@ export function PromptPanel({ ); } +function SafetyDisclosures({ + metadata, + preference, +}: { + metadata: BackendMetadata | null; + preference: "auto" | "placeholder" | "sofa"; +}) { + const sofaUnavailable = metadata ? !metadata.backends.sofa.available : false; + return ( +
+
+ Placeholder backend: deterministic infrastructure validation only, not biomechanical physics. +
+ {preference === "sofa" || sofaUnavailable ? ( +
+ SOFA backend unavailable: install/configure SOFA to run high-fidelity scenes. +
+ ) : null} +
+ Simulation/phantom autonomy training only. No live patient use. +
+
+ Supervised non-patient pilot mode only. Not for patient-care decisions. Not validated for autonomous surgery. +
+
+ ); +} + function CompiledSummary({ compiled }: { compiled: CompiledPromptResult }) { return (
diff --git a/frontend/src/components/RunPanel.tsx b/frontend/src/components/RunPanel.tsx index 676745e..e3c72bf 100644 --- a/frontend/src/components/RunPanel.tsx +++ b/frontend/src/components/RunPanel.tsx @@ -1,3 +1,4 @@ +import { useState } from "react"; import { CompiledPromptResult, SessionState, SofaPreview } from "../api"; interface Props { @@ -13,6 +14,7 @@ export function RunPanel({ session, compiled, artifacts, sofaPreview }: Props) {

Run & Data

{session ? : } {session?.summary ? : null} + {Object.keys(artifacts).length ? : null} {compiled?.sofa_preview_available && sofaPreview ? ( @@ -82,14 +84,93 @@ function SummaryBlock({ session }: { session: SessionState }) { ); } +function DataAccessBlock({ + artifacts, + session, +}: { + artifacts: Record; + session: SessionState | null; +}) { + const [copyState, setCopyState] = useState<"idle" | "copied" | "failed" | "unsupported">("idle"); + const stepLogPath = artifacts.step_log; + const summaryPath = artifacts.summary; + const runDir = artifacts.run_dir; + const primaryPath = stepLogPath ?? runDir ?? "artifacts/runs/run_*/episodes/episode_0001.jsonl"; + const hasRun = Boolean(session); + + const copyPath = async (path: string) => { + if (!navigator.clipboard?.writeText) { + setCopyState("unsupported"); + return; + } + try { + await navigator.clipboard.writeText(path); + setCopyState("copied"); + window.setTimeout(() => setCopyState("idle"), 1400); + } catch { + setCopyState("failed"); + } + }; + + return ( +
+
Structured data is saved locally
+

+ The viewer is only the playback surface. After each run, medsim writes the + trainable/evaluation trace as JSONL under artifacts/runs/. +

+
+
+
step-by-step trace
+
+ {hasRun ? primaryPath : "Run a scenario to create episode_0001.jsonl"} +
+
+ +
+
+ Each JSONL row contains action, tool poses, needle pose, targets, camera + condition, events, and outcome fields. The summary JSON is{" "} + {summaryPath ? ( + + {summaryPath} + + ) : ( + "created when a run completes." + )} +
+
+ ); +} + +function copyLabel(state: "idle" | "copied" | "failed" | "unsupported"): string { + switch (state) { + case "copied": + return "Copied"; + case "failed": + return "Copy failed"; + case "unsupported": + return "Copy unavailable"; + case "idle": + return "Copy path"; + } +} + function ArtifactBlock({ artifacts }: { artifacts: Record }) { return (
-

Artifacts

+

All artifact paths

{Object.entries(artifacts).map(([k, v]) => (
-
{k}
+
{artifactLabel(k)}
{v}
@@ -100,6 +181,25 @@ function ArtifactBlock({ artifacts }: { artifacts: Record }) { ); } +function artifactLabel(key: string): string { + switch (key) { + case "run_dir": + return "run folder"; + case "episode_dir": + return "episode folder"; + case "run_manifest": + return "run manifest"; + case "config_snapshot": + return "config snapshot"; + case "step_log": + return "JSONL step log"; + case "summary": + return "episode summary"; + default: + return key; + } +} + function SofaPreviewBlock({ sofaPreview }: { sofaPreview: SofaPreview }) { return (
diff --git a/frontend/src/components/Viewer.tsx b/frontend/src/components/Viewer.tsx index f762e83..32db323 100644 --- a/frontend/src/components/Viewer.tsx +++ b/frontend/src/components/Viewer.tsx @@ -1,8 +1,24 @@ import { Canvas } from "@react-three/fiber"; -import { Grid, OrbitControls, Html } from "@react-three/drei"; -import { useMemo } from "react"; +import { OrbitControls } from "@react-three/drei"; +import { useCallback, useMemo, useRef } from "react"; import * as THREE from "three"; +import type { OrbitControls as OrbitControlsImpl } from "three-stdlib"; import { SceneView, StepView } from "../api"; +import { AssetStatusOverlay } from "./viewer/AssetStatusOverlay"; +import { CameraPresetRig } from "./viewer/CameraPresetRig"; +import { CameraGimbal } from "./viewer/CameraGimbal"; +import { DemoControlRail } from "./viewer/DemoControlRail"; +import { HeroRobot } from "./viewer/HeroRobot"; +import { Needle } from "./viewer/Needle"; +import { OptionalHdriEnvironment } from "./viewer/OptionalHdriEnvironment"; +import { SceneEnvironment } from "./viewer/SceneEnvironment"; +import { SceneLighting } from "./viewer/SceneLighting"; +import { SurgicalStage } from "./viewer/SurgicalStage"; +import { TargetZone } from "./viewer/TargetZone"; +import { TissuePatch } from "./viewer/TissuePatch"; +import { Tool } from "./viewer/Tool"; +import { DEMO_ASSET_SCENE_CONFIG } from "./viewer/assetSceneConfig"; +import { useViewerAssetControls } from "./viewer/useViewerAssetControls"; interface Props { scene: SceneView; @@ -16,27 +32,101 @@ interface Props { export function Viewer({ scene, step }: Props) { const tissue = scene.tissue_patch; const backgroundColor = useMemo(() => new THREE.Color("#0a0f17"), []); + const controlsRef = useRef(null); + const { + controls, + statuses, + sceneMode, + cameraPreset, + cameraPresets, + cameraResetToken, + displayMode, + copyStatus, + reportAssetStatus, + setCameraPreset, + setAssetEnabled, + setDisplayMode, + resetView, + copyShareUrl, + } = useViewerAssetControls(); + const reportHdriStatus = useCallback( + (update: Parameters[1]) => reportAssetStatus("hdri", update), + [reportAssetStatus] + ); + const reportHeroRobotStatus = useCallback( + (update: Parameters[1]) => reportAssetStatus("heroRobot", update), + [reportAssetStatus] + ); return ( { gl.setClearColor(backgroundColor); + gl.shadowMap.enabled = true; + gl.shadowMap.type = THREE.PCFSoftShadowMap; + gl.outputColorSpace = THREE.SRGBColorSpace; + gl.toneMapping = THREE.ACESFilmicToneMapping; + gl.toneMappingExposure = 1.05; threeScene.background = backgroundColor; + threeScene.fog = new THREE.Fog("#0a0f17", 130, 260); }} > - - - - - + + + + - + + - + + + + {scene.targets.map((t) => { const currentTarget = step?.targets.find((s) => s.id === t.id); @@ -78,191 +168,3 @@ export function Viewer({ scene, step }: Props) { ); } - -function WorkbenchGround() { - return ( - <> - - - ); -} - -function TissuePatch({ - sizeMm, - centerMm, -}: { - sizeMm: [number, number, number]; - centerMm: [number, number, number]; -}) { - return ( - - - - - ); -} - -function TargetZone({ - id, - center, - radiusMm, - passed, -}: { - id: string; - center: [number, number, number]; - radiusMm: number; - passed: boolean; -}) { - const color = passed ? "#22c55e" : "#5eead4"; - return ( - - - - - - - - - - -
- {id} - {passed ? " ✓" : ""} -
- -
- ); -} - -function Tool({ - id, - kind, - positionMm, - grasping, - collisionCount, -}: { - id: string; - kind: string; - positionMm: [number, number, number]; - grasping: boolean; - collisionCount: number; -}) { - const shaftColor = kind === "needle_driver" ? "#94a3b8" : "#cbd5e1"; - const tipColor = grasping ? "#5eead4" : collisionCount > 0 ? "#f87171" : "#e2e8f0"; - - // Draw a simple "shaft + tip" geometry descending toward the tissue from - // the stored position. The shaft rises +Z away from the workspace. - return ( - - - - - - - - - - -
- {id} - {grasping ? " · grasp" : ""} - {collisionCount > 0 ? ` · ⚠${collisionCount}` : ""} -
- -
- ); -} - -function Needle({ - positionMm, - heldBy, -}: { - positionMm: [number, number, number]; - heldBy: string | null; -}) { - const color = heldBy ? "#fbbf24" : "#e5e7eb"; - return ( - - - - - - - - - - - ); -} - -function CameraGimbal({ - positionMm, - occluded, -}: { - positionMm: [number, number, number]; - occluded: boolean; -}) { - const color = occluded ? "#f59e0b" : "#5eead4"; - return ( - - - - - - - - - - -
- camera{occluded ? " · occluded" : ""} -
- -
- ); -} diff --git a/frontend/src/components/viewer/AssetStatusOverlay.tsx b/frontend/src/components/viewer/AssetStatusOverlay.tsx new file mode 100644 index 0000000..275f8ea --- /dev/null +++ b/frontend/src/components/viewer/AssetStatusOverlay.tsx @@ -0,0 +1,168 @@ +import { Html } from "@react-three/drei"; +import { CSSProperties } from "react"; +import { + AssetLoadStatus, + ViewerAssetStatuses, + ViewerCameraPreset, + ViewerDisplayMode, +} from "./assetSceneConfig"; + +interface Props { + cameraPreset: ViewerCameraPreset; + displayMode: ViewerDisplayMode; + sceneMode: string; + statuses: ViewerAssetStatuses; +} + +export function AssetStatusOverlay({ cameraPreset, displayMode, sceneMode, statuses }: Props) { + const debugVisible = displayMode === "debug"; + const presentationVisible = displayMode === "presentation"; + const showBadges = debugVisible; + const orderedStatuses = [statuses.hdri, statuses.heroRobot, statuses.table]; + + return ( + +
+
+
+ Scene: {sceneMode} + | + Camera: {cameraPreset.label} + | + Mode: {displayMode} +
+ {presentationVisible ? ( +
+ Backend-driven task view | static robot context +
+ ) : showBadges ? ( +
+ {orderedStatuses.map((asset) => ( +
+ {asset.label} + {asset.status} +
+ ))} +
+ ) : null} + {debugVisible ? ( +
+ {orderedStatuses.map((asset) => ( +
+ {asset.label}: {asset.message} + {asset.error ? ` (${asset.error})` : ""} + {asset.url ? ` | ${asset.url}` : ""} +
+ ))} +
+ Controls:{" "} + + ?hdri=0|1&heroRobot=0|1&table=0|1&cameraPreset= + presentation_default|task_focus|overview|room_hero&presentation=0|1&assetDebug=1 + +
+
+ ) : null} +
+
+ + ); +} + +function statusStyle(status: AssetLoadStatus): CSSProperties { + switch (status) { + case "ready": + return { borderColor: "rgba(94, 234, 212, 0.55)", color: "#ccfbf1" }; + case "loading": + return { borderColor: "rgba(147, 197, 253, 0.55)", color: "#dbeafe" }; + case "failed": + return { borderColor: "rgba(248, 113, 113, 0.62)", color: "#fee2e2" }; + case "fallback": + return { borderColor: "rgba(251, 191, 36, 0.62)", color: "#fef3c7" }; + case "deferred": + return { borderColor: "rgba(168, 85, 247, 0.55)", color: "#f3e8ff" }; + case "disabled": + return { borderColor: "rgba(148, 163, 184, 0.4)", color: "#cbd5e1" }; + } +} + +const containerStyle: CSSProperties = { + position: "absolute", + bottom: 12, + left: 12, + pointerEvents: "none", + userSelect: "none", +}; + +const panelStyle: CSSProperties = { + maxWidth: 380, + padding: "7px 9px", + border: "1px solid rgba(148, 163, 184, 0.22)", + borderRadius: 8, + background: "rgba(8, 13, 21, 0.7)", + boxShadow: "0 18px 40px rgba(0, 0, 0, 0.32)", + backdropFilter: "blur(10px)", + color: "#e5e7eb", + fontFamily: + "Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif", + fontSize: 11, + lineHeight: 1.35, +}; + +const presentationPanelStyle: CSSProperties = { + ...panelStyle, + maxWidth: 430, + padding: "7px 9px", + background: "rgba(8, 13, 21, 0.56)", +}; + +const sceneModeStyle: CSSProperties = { + color: "#f8fafc", + fontWeight: 700, + letterSpacing: 0, +}; + +const separatorStyle: CSSProperties = { + color: "#64748b", + margin: "0 7px", +}; + +const presentationNoteStyle: CSSProperties = { + marginTop: 5, + color: "#cbd5e1", +}; + +const badgeRowStyle: CSSProperties = { + marginTop: 8, + display: "flex", + flexWrap: "wrap", + gap: 6, +}; + +const badgeStyle: CSSProperties = { + display: "inline-flex", + alignItems: "center", + gap: 6, + padding: "4px 7px", + border: "1px solid", + borderRadius: 6, + background: "rgba(15, 23, 42, 0.74)", + whiteSpace: "nowrap", +}; + +const labelStyle: CSSProperties = { + color: "#f8fafc", + fontWeight: 650, +}; + +const debugStyle: CSSProperties = { + marginTop: 9, + paddingTop: 8, + borderTop: "1px solid rgba(148, 163, 184, 0.18)", + color: "#cbd5e1", +}; + +const debugLineStyle: CSSProperties = { + marginTop: 4, + overflowWrap: "anywhere", +}; diff --git a/frontend/src/components/viewer/CameraGimbal.tsx b/frontend/src/components/viewer/CameraGimbal.tsx new file mode 100644 index 0000000..d10874e --- /dev/null +++ b/frontend/src/components/viewer/CameraGimbal.tsx @@ -0,0 +1,49 @@ +import { Html } from "@react-three/drei"; + +export function CameraGimbal({ + positionMm, + occluded, +}: { + positionMm: [number, number, number]; + occluded: boolean; +}) { + const color = occluded ? "#f59e0b" : "#5eead4"; + return ( + + + + + + + + + + + + + + +
+ camera{occluded ? " · occluded" : ""} +
+ +
+ ); +} diff --git a/frontend/src/components/viewer/CameraPresetRig.tsx b/frontend/src/components/viewer/CameraPresetRig.tsx new file mode 100644 index 0000000..06e8663 --- /dev/null +++ b/frontend/src/components/viewer/CameraPresetRig.tsx @@ -0,0 +1,35 @@ +import { useThree } from "@react-three/fiber"; +import { MutableRefObject, useEffect } from "react"; +import * as THREE from "three"; +import type { OrbitControls as OrbitControlsImpl } from "three-stdlib"; +import { ViewerCameraPreset } from "./assetSceneConfig"; + +interface Props { + preset: ViewerCameraPreset; + controlsRef: MutableRefObject; + resetToken: number; +} + +export function CameraPresetRig({ preset, controlsRef, resetToken }: Props) { + const { camera, invalidate } = useThree(); + + useEffect(() => { + const target = new THREE.Vector3(...preset.target); + camera.position.set(...preset.position); + camera.up.set(0, 0, 1); + camera.lookAt(target); + + if (camera instanceof THREE.PerspectiveCamera) { + camera.fov = preset.fov; + camera.near = 0.1; + camera.far = 1000; + camera.updateProjectionMatrix(); + } + + controlsRef.current?.target.set(...preset.target); + controlsRef.current?.update(); + invalidate(); + }, [camera, controlsRef, invalidate, preset, resetToken]); + + return null; +} diff --git a/frontend/src/components/viewer/DemoControlRail.tsx b/frontend/src/components/viewer/DemoControlRail.tsx new file mode 100644 index 0000000..9b7c8e7 --- /dev/null +++ b/frontend/src/components/viewer/DemoControlRail.tsx @@ -0,0 +1,270 @@ +import { Html } from "@react-three/drei"; +import { CSSProperties, ChangeEvent } from "react"; +import { + AssetKey, + CameraPresetKey, + ViewerAssetControls, + ViewerAssetStatuses, + ViewerCameraPreset, + ViewerDisplayMode, +} from "./assetSceneConfig"; + +interface Props { + cameraPresets: ViewerCameraPreset[]; + controls: ViewerAssetControls; + copyShareUrl: () => void; + copyStatus: "idle" | "copied" | "failed" | "unsupported"; + displayMode: ViewerDisplayMode; + resetView: () => void; + setAssetEnabled: ( + key: Extract, + enabled: boolean + ) => void; + setCameraPreset: (key: CameraPresetKey) => void; + setDisplayMode: (mode: ViewerDisplayMode) => void; + statuses: ViewerAssetStatuses; +} + +export function DemoControlRail({ + cameraPresets, + controls, + copyShareUrl, + copyStatus, + displayMode, + resetView, + setAssetEnabled, + setCameraPreset, + setDisplayMode, + statuses, +}: Props) { + const onCameraChange = (event: ChangeEvent) => { + setCameraPreset(event.target.value as CameraPresetKey); + }; + const presentationRail = displayMode === "presentation"; + + return ( + +
+
+
+ + +
+ +
+ setDisplayMode("compact")} + /> + setDisplayMode("presentation")} + /> + setDisplayMode("debug")} + /> +
+ + {presentationRail ? null : ( + <> +
+ setAssetEnabled("hdri", !controls.hdri)} + /> + setAssetEnabled("heroRobot", !controls.heroRobot)} + /> + +
+ +
+ Backend-driven task view | static robot context + +
+ + )} +
+
+ + ); +} + +function ToggleButton({ + active, + label, + onClick, +}: { + active: boolean; + label: string; + onClick: () => void; +}) { + return ( + + ); +} + +function statusSuffix(status: string): string { + if (status === "ready" || status === "failed" || status === "loading") { + return status; + } + if (status === "disabled") { + return "off"; + } + return ""; +} + +function copyStatusLabel(status: Props["copyStatus"]): string { + switch (status) { + case "copied": + return "URL copied"; + case "failed": + return "Copy failed"; + case "unsupported": + return "Copy unavailable"; + case "idle": + return "Copy URL"; + } +} + +const containerStyle: CSSProperties = { + position: "absolute", + top: 12, + right: 12, + pointerEvents: "none", + userSelect: "none", +}; + +const railStyle: CSSProperties = { + pointerEvents: "auto", + display: "flex", + flexDirection: "column", + gap: 7, + maxWidth: 390, + padding: "10px 11px", + border: "1px solid rgba(148, 163, 184, 0.22)", + borderRadius: 8, + background: "rgba(8, 13, 21, 0.68)", + boxShadow: "0 18px 40px rgba(0, 0, 0, 0.32)", + backdropFilter: "blur(10px)", + color: "#e5e7eb", + fontFamily: + "Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, Segoe UI, sans-serif", + fontSize: 11, + lineHeight: 1.35, +}; + +const presentationRailStyle: CSSProperties = { + ...railStyle, + maxWidth: 330, + padding: "8px 9px", + background: "rgba(8, 13, 21, 0.54)", +}; + +const rowStyle: CSSProperties = { + display: "flex", + alignItems: "center", + flexWrap: "wrap", + gap: 6, +}; + +const labelStyle: CSSProperties = { + display: "flex", + alignItems: "center", + gap: 6, + color: "#cbd5e1", + fontWeight: 650, +}; + +const selectStyle: CSSProperties = { + minWidth: 158, + padding: "4px 7px", + border: "1px solid rgba(148, 163, 184, 0.36)", + borderRadius: 6, + background: "rgba(15, 23, 42, 0.84)", + color: "#f8fafc", + font: "inherit", +}; + +const buttonStyle: CSSProperties = { + padding: "4px 7px", + border: "1px solid rgba(148, 163, 184, 0.32)", + borderRadius: 6, + background: "rgba(15, 23, 42, 0.72)", + color: "#dbeafe", + font: "inherit", + cursor: "pointer", +}; + +const activeButtonStyle: CSSProperties = { + ...buttonStyle, + borderColor: "rgba(94, 234, 212, 0.58)", + background: "rgba(20, 83, 91, 0.62)", + color: "#ccfbf1", +}; + +const disabledButtonStyle: CSSProperties = { + ...buttonStyle, + borderColor: "rgba(168, 85, 247, 0.42)", + color: "#f3e8ff", + cursor: "not-allowed", + opacity: 0.76, +}; + +const footerStyle: CSSProperties = { + display: "flex", + alignItems: "center", + justifyContent: "space-between", + gap: 8, + paddingTop: 4, + borderTop: "1px solid rgba(148, 163, 184, 0.16)", +}; + +const truthStyle: CSSProperties = { + color: "#cbd5e1", + overflowWrap: "anywhere", +}; + +const copyButtonStyle: CSSProperties = { + ...buttonStyle, + whiteSpace: "nowrap", +}; diff --git a/frontend/src/components/viewer/HeroRobot.tsx b/frontend/src/components/viewer/HeroRobot.tsx new file mode 100644 index 0000000..fbbbdfe --- /dev/null +++ b/frontend/src/components/viewer/HeroRobot.tsx @@ -0,0 +1,158 @@ +import { useEffect, useState } from "react"; +import * as THREE from "three"; +import { GLTFLoader } from "three/examples/jsm/loaders/GLTFLoader.js"; +import { + AssetStatusUpdate, + DEMO_ASSET_SCENE_CONFIG, + HeroRobotAssetConfig, + errorMessage, +} from "./assetSceneConfig"; + +interface LoadedRobot { + scene: THREE.Group; + offset: THREE.Vector3; + scale: number; +} + +interface Props { + config?: HeroRobotAssetConfig; + enabled?: boolean; + onStatusChange?: (update: AssetStatusUpdate) => void; +} + +export function HeroRobot({ + config = DEMO_ASSET_SCENE_CONFIG.heroRobot, + enabled = config.enabledByDefault, + onStatusChange, +}: Props) { + const [robot, setRobot] = useState(null); + + useEffect(() => { + if (!enabled) { + setRobot(null); + onStatusChange?.({ + status: "disabled", + message: "Disabled by viewer controls; procedural scene remains active.", + }); + return; + } + + let cancelled = false; + const loader = new GLTFLoader(); + + setRobot(null); + onStatusChange?.({ + status: "loading", + message: "Loading optional static GLB hero prop.", + }); + + loader.load( + config.url, + (gltf) => { + if (cancelled) { + disposeObject(gltf.scene); + return; + } + try { + setRobot(prepareRobot(gltf.scene, config)); + onStatusChange?.({ + status: "ready", + message: "Static hero robot prop loaded; it is not driven by backend state.", + }); + } catch (error) { + disposeObject(gltf.scene); + const message = errorMessage(error); + console.warn("Optional hero robot failed to prepare; using procedural scene only.", error); + setRobot(null); + onStatusChange?.({ + status: "failed", + message: "GLB loaded but could not be prepared; procedural scene remains active.", + error: message, + }); + } + }, + undefined, + (error) => { + if (cancelled) { + return; + } + const message = errorMessage(error); + console.warn("Optional hero robot failed to load; using procedural scene only.", error); + setRobot(null); + onStatusChange?.({ + status: "failed", + message: "GLB load failed; procedural scene remains active.", + error: message, + }); + } + ); + + return () => { + cancelled = true; + }; + }, [config, enabled, onStatusChange]); + + useEffect(() => { + return () => { + if (robot) { + disposeObject(robot.scene); + } + }; + }, [robot]); + + if (!robot) { + return null; + } + + return ( + + + + ); +} + +function disposeObject(root: THREE.Object3D) { + root.traverse((object) => { + const mesh = object as THREE.Mesh; + if (!mesh.isMesh) { + return; + } + mesh.geometry?.dispose(); + const materials = Array.isArray(mesh.material) ? mesh.material : [mesh.material]; + for (const material of materials) { + material?.dispose(); + } + }); +} + +function prepareRobot(scene: THREE.Group, config: HeroRobotAssetConfig): LoadedRobot { + scene.traverse((object) => { + const mesh = object as THREE.Mesh; + if (mesh.isMesh) { + mesh.castShadow = config.shadows.cast; + mesh.receiveShadow = config.shadows.receive; + } + }); + + const box = new THREE.Box3().setFromObject(scene); + const size = new THREE.Vector3(); + const center = new THREE.Vector3(); + box.getSize(size); + box.getCenter(center); + const maxDimension = Math.max(size.x, size.y, size.z) || 1; + const offset = new THREE.Vector3( + config.transform.centerOnBoundsXY ? -center.x : 0, + config.transform.centerOnBoundsXY ? -center.y : 0, + config.transform.anchorBottomToFloor ? -box.min.z : -center.z + ); + + return { + scene, + offset, + scale: config.transform.targetMaxDimensionMm / maxDimension, + }; +} diff --git a/frontend/src/components/viewer/Needle.tsx b/frontend/src/components/viewer/Needle.tsx new file mode 100644 index 0000000..a329d73 --- /dev/null +++ b/frontend/src/components/viewer/Needle.tsx @@ -0,0 +1,33 @@ +export function Needle({ + positionMm, + heldBy, +}: { + positionMm: [number, number, number]; + heldBy: string | null; +}) { + const color = heldBy ? "#fbbf24" : "#e5e7eb"; + return ( + + + + + + + + + + + ); +} diff --git a/frontend/src/components/viewer/OptionalHdriEnvironment.tsx b/frontend/src/components/viewer/OptionalHdriEnvironment.tsx new file mode 100644 index 0000000..848f1fe --- /dev/null +++ b/frontend/src/components/viewer/OptionalHdriEnvironment.tsx @@ -0,0 +1,93 @@ +import { useThree } from "@react-three/fiber"; +import { useEffect } from "react"; +import * as THREE from "three"; +import { EXRLoader } from "three/examples/jsm/loaders/EXRLoader.js"; +import { + AssetStatusUpdate, + DEMO_ASSET_SCENE_CONFIG, + HdriAssetConfig, + errorMessage, +} from "./assetSceneConfig"; + +interface Props { + config?: HdriAssetConfig; + enabled?: boolean; + onStatusChange?: (update: AssetStatusUpdate) => void; +} + +export function OptionalHdriEnvironment({ + config = DEMO_ASSET_SCENE_CONFIG.hdri, + enabled = config.enabledByDefault, + onStatusChange, +}: Props) { + const { scene } = useThree(); + + useEffect(() => { + if (!enabled) { + onStatusChange?.({ + status: "disabled", + message: "Disabled by viewer controls; procedural lighting remains active.", + }); + return; + } + + let cancelled = false; + let loadedTexture: THREE.Texture | null = null; + const previousEnvironment = scene.environment; + const previousBackground = scene.background; + const loader = new EXRLoader(); + + onStatusChange?.({ + status: "loading", + message: "Loading optional EXR environment.", + }); + + loader.load( + config.url, + (texture) => { + if (cancelled) { + texture.dispose(); + return; + } + texture.mapping = THREE.EquirectangularReflectionMapping; + loadedTexture = texture; + scene.environment = texture; + if (config.applyAsBackground) { + scene.background = texture; + } + onStatusChange?.({ + status: "ready", + message: config.applyAsBackground + ? "EXR loaded for environment lighting and background." + : "EXR loaded for environment lighting; procedural background remains active.", + }); + }, + undefined, + (error) => { + if (cancelled) { + return; + } + const message = errorMessage(error); + console.warn("Optional HDRI environment failed to load; using procedural lighting.", error); + onStatusChange?.({ + status: "failed", + message: "EXR load failed; procedural lighting and background remain active.", + error: message, + }); + } + ); + + return () => { + cancelled = true; + if (scene.environment === loadedTexture) { + scene.environment = previousEnvironment; + } + if (scene.background === loadedTexture) { + scene.background = previousBackground; + } + loadedTexture?.dispose(); + }; + }, [config.applyAsBackground, config.url, enabled, onStatusChange, scene]); + + return null; +} diff --git a/frontend/src/components/viewer/SceneEnvironment.tsx b/frontend/src/components/viewer/SceneEnvironment.tsx new file mode 100644 index 0000000..a1e5adc --- /dev/null +++ b/frontend/src/components/viewer/SceneEnvironment.tsx @@ -0,0 +1,65 @@ +import { WorkbenchGround } from "./WorkbenchGround"; + +export function SceneEnvironment() { + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + ); +} diff --git a/frontend/src/components/viewer/SceneLighting.tsx b/frontend/src/components/viewer/SceneLighting.tsx new file mode 100644 index 0000000..23f7e80 --- /dev/null +++ b/frontend/src/components/viewer/SceneLighting.tsx @@ -0,0 +1,24 @@ +export function SceneLighting() { + return ( + <> + + + + + + + + + ); +} diff --git a/frontend/src/components/viewer/SurgicalStage.tsx b/frontend/src/components/viewer/SurgicalStage.tsx new file mode 100644 index 0000000..065c0ea --- /dev/null +++ b/frontend/src/components/viewer/SurgicalStage.tsx @@ -0,0 +1,41 @@ +export function SurgicalStage({ + tissueSizeMm, + tissueCenterMm, +}: { + tissueSizeMm: [number, number, number]; + tissueCenterMm: [number, number, number]; +}) { + const stageWidth = tissueSizeMm[0] + 24; + const stageDepth = tissueSizeMm[1] + 20; + const stageZ = tissueCenterMm[2] - 1.35; + const railZ = tissueCenterMm[2] + 0.55; + + return ( + + + + + + + + + + + + + + + + + + + + + + + + + + + ); +} diff --git a/frontend/src/components/viewer/TargetZone.tsx b/frontend/src/components/viewer/TargetZone.tsx new file mode 100644 index 0000000..e53b410 --- /dev/null +++ b/frontend/src/components/viewer/TargetZone.tsx @@ -0,0 +1,56 @@ +import { Html } from "@react-three/drei"; + +export function TargetZone({ + id, + center, + radiusMm, + passed, +}: { + id: string; + center: [number, number, number]; + radiusMm: number; + passed: boolean; +}) { + const color = passed ? "#22c55e" : "#5eead4"; + return ( + + + + + + + + + + + + + + +
+ {id} + {passed ? " ✓" : ""} +
+ +
+ ); +} diff --git a/frontend/src/components/viewer/TissuePatch.tsx b/frontend/src/components/viewer/TissuePatch.tsx new file mode 100644 index 0000000..da89853 --- /dev/null +++ b/frontend/src/components/viewer/TissuePatch.tsx @@ -0,0 +1,26 @@ +export function TissuePatch({ + sizeMm, + centerMm, +}: { + sizeMm: [number, number, number]; + centerMm: [number, number, number]; +}) { + return ( + + + + + + + + + + + ); +} diff --git a/frontend/src/components/viewer/Tool.tsx b/frontend/src/components/viewer/Tool.tsx new file mode 100644 index 0000000..00f7fbc --- /dev/null +++ b/frontend/src/components/viewer/Tool.tsx @@ -0,0 +1,61 @@ +import { Html } from "@react-three/drei"; + +export function Tool({ + id, + kind, + positionMm, + grasping, + collisionCount, +}: { + id: string; + kind: string; + positionMm: [number, number, number]; + grasping: boolean; + collisionCount: number; +}) { + const shaftColor = kind === "needle_driver" ? "#94a3b8" : "#cbd5e1"; + const tipColor = grasping ? "#5eead4" : collisionCount > 0 ? "#f87171" : "#e2e8f0"; + + // Draw a simple "shaft + tip" geometry descending toward the tissue from + // the stored position. The shaft rises +Z away from the workspace. + return ( + + + + + + + + + + + + 0 ? 0.18 : 0.04} + metalness={0.78} + roughness={0.2} + /> + + +
+ {id} + {grasping ? " · grasp" : ""} + {collisionCount > 0 ? ` · ⚠${collisionCount}` : ""} +
+ +
+ ); +} diff --git a/frontend/src/components/viewer/WorkbenchGround.tsx b/frontend/src/components/viewer/WorkbenchGround.tsx new file mode 100644 index 0000000..a059937 --- /dev/null +++ b/frontend/src/components/viewer/WorkbenchGround.tsx @@ -0,0 +1,21 @@ +import { Grid } from "@react-three/drei"; + +export function WorkbenchGround() { + return ( + <> + + + ); +} diff --git a/frontend/src/components/viewer/assetSceneConfig.ts b/frontend/src/components/viewer/assetSceneConfig.ts new file mode 100644 index 0000000..9d694fd --- /dev/null +++ b/frontend/src/components/viewer/assetSceneConfig.ts @@ -0,0 +1,278 @@ +export type Vector3Tuple = [number, number, number]; + +export type AssetLoadStatus = + | "disabled" + | "loading" + | "ready" + | "failed" + | "fallback" + | "deferred"; + +export type AssetKey = "hdri" | "heroRobot" | "table"; + +export type CameraPresetKey = "presentation_default" | "task_focus" | "overview" | "room_hero"; + +export type ViewerDisplayMode = "compact" | "presentation" | "debug"; + +export interface ViewerAssetStatus { + key: AssetKey; + label: string; + status: AssetLoadStatus; + url?: string; + message?: string; + error?: string; +} + +export type ViewerAssetStatuses = Record; + +export interface ViewerAssetControls { + hdri: boolean; + heroRobot: boolean; + table: boolean; + assetDebug: boolean; + presentation: boolean; + cameraPreset: CameraPresetKey; +} + +export interface HdriAssetConfig { + key: "hdri"; + label: string; + url: string; + enabledByDefault: boolean; + applyAsBackground: boolean; +} + +export interface HeroRobotAssetConfig { + key: "heroRobot"; + label: string; + url: string; + enabledByDefault: boolean; + transform: { + position: Vector3Tuple; + rotation: Vector3Tuple; + scale: number; + targetMaxDimensionMm: number; + floorZ: number; + centerOnBoundsXY: boolean; + anchorBottomToFloor: boolean; + }; + shadows: { + cast: boolean; + receive: boolean; + }; +} + +export interface TableAssetConfig { + key: "table"; + label: string; + url: string; + enabledByDefault: boolean; + note: string; +} + +export interface ViewerCameraPreset { + key: CameraPresetKey; + label: string; + position: Vector3Tuple; + target: Vector3Tuple; + fov: number; +} + +export interface DemoAssetSceneConfig { + hdri: HdriAssetConfig; + heroRobot: HeroRobotAssetConfig; + table: TableAssetConfig; + debugVisibleByDefault: boolean; + presentationVisibleByDefault: boolean; + defaultCameraPreset: CameraPresetKey; + cameraPresets: Record; +} + +export interface AssetStatusUpdate { + status: AssetLoadStatus; + message?: string; + error?: string; +} + +export const DEMO_ASSET_SCENE_CONFIG: DemoAssetSceneConfig = { + hdri: { + key: "hdri", + label: "HDRI", + url: "/demo-assets/hdri/surgery_4k.exr", + enabledByDefault: true, + applyAsBackground: false, + }, + heroRobot: { + key: "heroRobot", + label: "Hero robot", + url: "/demo-assets/robot/ehosp_robotic_surgery_platform_-_basic.glb", + enabledByDefault: true, + transform: { + position: [68, 42, -0.42], + rotation: [Math.PI / 2, 0, -0.42], + scale: 1, + targetMaxDimensionMm: 78, + floorZ: -0.42, + centerOnBoundsXY: true, + anchorBottomToFloor: true, + }, + shadows: { + cast: true, + receive: true, + }, + }, + table: { + key: "table", + label: "Table asset", + url: "/demo-assets/props/surgical_medical_table_9mb.glb", + enabledByDefault: false, + note: "Present for future use; intentionally disabled so it does not obscure the state-driven task view.", + }, + debugVisibleByDefault: false, + presentationVisibleByDefault: true, + defaultCameraPreset: "presentation_default", + cameraPresets: { + presentation_default: { + key: "presentation_default", + label: "Presentation default", + position: [92, -112, 72], + target: [10, 0, 16], + fov: 42, + }, + task_focus: { + key: "task_focus", + label: "Task focus", + position: [48, -62, 42], + target: [0, -2, 8], + fov: 34, + }, + overview: { + key: "overview", + label: "Overview", + position: [128, -142, 100], + target: [12, 6, 20], + fov: 48, + }, + room_hero: { + key: "room_hero", + label: "Room hero", + position: [148, -92, 86], + target: [38, 16, 22], + fov: 46, + }, + }, +}; + +export function parseViewerAssetControls( + search: string, + config: DemoAssetSceneConfig = DEMO_ASSET_SCENE_CONFIG +): ViewerAssetControls { + const params = new URLSearchParams(search.startsWith("?") ? search.slice(1) : search); + return { + hdri: parseBooleanParam(params.get("hdri"), config.hdri.enabledByDefault), + heroRobot: parseBooleanParam(params.get("heroRobot"), config.heroRobot.enabledByDefault), + table: parseBooleanParam(params.get("table"), config.table.enabledByDefault), + assetDebug: parseBooleanParam(params.get("assetDebug"), config.debugVisibleByDefault), + presentation: parseBooleanParam( + params.get("presentation"), + config.presentationVisibleByDefault + ), + cameraPreset: parseCameraPresetParam(params.get("cameraPreset"), config.defaultCameraPreset), + }; +} + +export function initialAssetStatuses( + controls: ViewerAssetControls, + config: DemoAssetSceneConfig = DEMO_ASSET_SCENE_CONFIG +): ViewerAssetStatuses { + return { + hdri: { + key: "hdri", + label: config.hdri.label, + status: controls.hdri ? "loading" : "disabled", + url: config.hdri.url, + message: controls.hdri ? "Attempting optional EXR environment load." : "Disabled by viewer controls.", + }, + heroRobot: { + key: "heroRobot", + label: config.heroRobot.label, + status: controls.heroRobot ? "loading" : "disabled", + url: config.heroRobot.url, + message: controls.heroRobot + ? "Attempting optional static GLB hero prop load." + : "Disabled by viewer controls.", + }, + table: { + key: "table", + label: config.table.label, + status: "deferred", + url: config.table.url, + message: controls.table ? "Table query recognized; GLB integration is deferred." : config.table.note, + }, + }; +} + +export function describeSceneMode(statuses: ViewerAssetStatuses): string { + const activeAssets = [ + statuses.hdri.status === "ready" ? "hdri" : null, + statuses.heroRobot.status === "ready" ? "hero robot" : null, + ].filter((item): item is string => item !== null); + + if (activeAssets.length === 0) { + return "fallback only"; + } + return `fallback + ${activeAssets.join(" + ")}`; +} + +export function errorMessage(error: unknown): string { + if (error instanceof Error) { + return error.message; + } + return String(error); +} + +export function cameraPresetForKey( + key: CameraPresetKey, + config: DemoAssetSceneConfig = DEMO_ASSET_SCENE_CONFIG +): ViewerCameraPreset { + return config.cameraPresets[key] ?? config.cameraPresets[config.defaultCameraPreset]; +} + +export function displayModeForControls(controls: ViewerAssetControls): ViewerDisplayMode { + if (controls.assetDebug) { + return "debug"; + } + if (controls.presentation) { + return "presentation"; + } + return "compact"; +} + +function parseBooleanParam(value: string | null, defaultValue: boolean): boolean { + if (value === null) { + return defaultValue; + } + const normalized = value.trim().toLowerCase(); + if (["0", "false", "off", "no"].includes(normalized)) { + return false; + } + if (["1", "true", "on", "yes"].includes(normalized)) { + return true; + } + return defaultValue; +} + +function parseCameraPresetParam( + value: string | null, + defaultValue: CameraPresetKey +): CameraPresetKey { + if ( + value === "presentation_default" || + value === "task_focus" || + value === "overview" || + value === "room_hero" + ) { + return value; + } + return defaultValue; +} diff --git a/frontend/src/components/viewer/useViewerAssetControls.ts b/frontend/src/components/viewer/useViewerAssetControls.ts new file mode 100644 index 0000000..c290c6c --- /dev/null +++ b/frontend/src/components/viewer/useViewerAssetControls.ts @@ -0,0 +1,146 @@ +import { useCallback, useEffect, useMemo, useState } from "react"; +import { + AssetKey, + AssetStatusUpdate, + CameraPresetKey, + DEMO_ASSET_SCENE_CONFIG, + ViewerAssetControls, + ViewerAssetStatuses, + ViewerDisplayMode, + cameraPresetForKey, + describeSceneMode, + displayModeForControls, + initialAssetStatuses, + parseViewerAssetControls, +} from "./assetSceneConfig"; + +export function useViewerAssetControls() { + const [controls, setControls] = useState(() => { + const search = typeof window === "undefined" ? "" : window.location.search; + return parseViewerAssetControls(search); + }); + const [cameraResetToken, setCameraResetToken] = useState(0); + const [copyStatus, setCopyStatus] = useState<"idle" | "copied" | "failed" | "unsupported">( + "idle" + ); + + const [statuses, setStatuses] = useState(() => + initialAssetStatuses(controls, DEMO_ASSET_SCENE_CONFIG) + ); + + useEffect(() => { + syncQueryParams(controls); + setCopyStatus("idle"); + }, [controls]); + + useEffect(() => { + setStatuses((current) => ({ + ...current, + table: { + ...current.table, + status: "deferred", + message: controls.table + ? "Table control is on, but GLB table integration is intentionally deferred." + : DEMO_ASSET_SCENE_CONFIG.table.note, + }, + })); + }, [controls.table]); + + const reportAssetStatus = useCallback((key: AssetKey, update: AssetStatusUpdate) => { + setStatuses((current) => ({ + ...current, + [key]: { + ...current[key], + status: update.status, + message: update.message, + error: update.error, + }, + })); + }, []); + + const setCameraPreset = useCallback((cameraPreset: CameraPresetKey) => { + setControls((current) => ({ + ...current, + cameraPreset, + })); + }, []); + + const setAssetEnabled = useCallback( + (key: Extract, enabled: boolean) => { + setControls((current) => ({ + ...current, + [key]: enabled, + })); + }, + [] + ); + + const setDisplayMode = useCallback((displayMode: ViewerDisplayMode) => { + setControls((current) => ({ + ...current, + assetDebug: displayMode === "debug", + presentation: displayMode === "presentation", + })); + }, []); + + const resetView = useCallback(() => { + setCameraResetToken((value) => value + 1); + }, []); + + const copyShareUrl = useCallback(async () => { + if (typeof window === "undefined" || !navigator.clipboard) { + setCopyStatus("unsupported"); + return; + } + try { + await navigator.clipboard.writeText(window.location.href); + setCopyStatus("copied"); + } catch { + setCopyStatus("failed"); + } + }, []); + + const sceneMode = useMemo(() => describeSceneMode(statuses), [statuses]); + const cameraPreset = useMemo( + () => cameraPresetForKey(controls.cameraPreset), + [controls.cameraPreset] + ); + const displayMode = useMemo(() => displayModeForControls(controls), [controls]); + const cameraPresets = useMemo(() => Object.values(DEMO_ASSET_SCENE_CONFIG.cameraPresets), []); + + return { + controls, + statuses, + sceneMode, + cameraPreset, + cameraPresets, + cameraResetToken, + displayMode, + copyStatus, + reportAssetStatus, + setCameraPreset, + setAssetEnabled, + setDisplayMode, + resetView, + copyShareUrl, + }; +} + +function syncQueryParams(controls: ViewerAssetControls) { + if (typeof window === "undefined") { + return; + } + + const url = new URL(window.location.href); + url.searchParams.set("hdri", controls.hdri ? "1" : "0"); + url.searchParams.set("heroRobot", controls.heroRobot ? "1" : "0"); + url.searchParams.set("table", controls.table ? "1" : "0"); + url.searchParams.set("assetDebug", controls.assetDebug ? "1" : "0"); + url.searchParams.set("presentation", controls.presentation ? "1" : "0"); + url.searchParams.set("cameraPreset", controls.cameraPreset); + window.history.replaceState( + window.history.state, + "", + `${url.pathname}${url.search}${url.hash}` + ); +} diff --git a/frontend/src/styles.css b/frontend/src/styles.css index 94e2458..5ee8170 100644 --- a/frontend/src/styles.css +++ b/frontend/src/styles.css @@ -30,6 +30,7 @@ body, color: var(--text); font-family: var(--font-sans); font-size: 13px; + line-height: 1.45; } button { @@ -206,11 +207,11 @@ textarea { } .section { - padding: 12px 14px; + padding: 14px 16px; border-bottom: 1px solid var(--border); display: flex; flex-direction: column; - gap: 8px; + gap: 10px; } .section h2 { margin: 0 0 4px 0; @@ -248,7 +249,7 @@ textarea { .kv { display: grid; grid-template-columns: 110px 1fr; - gap: 4px 8px; + gap: 6px 10px; font-size: 12px; } .kv .k { @@ -318,13 +319,69 @@ textarea { background: #0a0f17; border: 1px solid var(--border); border-radius: 4px; - padding: 8px; + padding: 10px; font-family: var(--font-mono); font-size: 11px; white-space: pre-wrap; word-break: break-word; max-height: 320px; overflow: auto; + line-height: 1.55; +} + +.data-access-card { + padding: 12px; + border: 1px solid rgba(94, 234, 212, 0.18); + border-radius: 8px; + background: + linear-gradient(135deg, rgba(20, 83, 91, 0.24), rgba(15, 23, 42, 0.36)), + rgba(10, 15, 23, 0.74); + color: var(--text); +} +.data-access-card p { + margin: 5px 0 10px; + color: var(--text-dim); + font-size: 12px; +} +.data-access-title { + font-size: 12px; + font-weight: 700; + color: #ccfbf1; +} +.artifact-callout { + display: grid; + grid-template-columns: minmax(0, 1fr) auto; + gap: 10px; + align-items: center; + margin-bottom: 8px; + padding: 8px; + border: 1px solid rgba(148, 163, 184, 0.2); + border-radius: 6px; + background: rgba(8, 13, 21, 0.52); +} +.artifact-label { + margin-bottom: 3px; + color: var(--accent); + font-size: 10px; + font-weight: 700; + letter-spacing: 0.06em; + text-transform: uppercase; +} +.artifact-path { + color: #e5e7eb; + font-size: 11px; + overflow-wrap: anywhere; +} +.copy-path-button { + white-space: nowrap; +} +.data-note { + margin-bottom: 8px; + padding: 8px 10px; + border: 1px solid rgba(148, 163, 184, 0.2); + border-radius: 6px; + background: rgba(15, 23, 42, 0.56); + color: var(--text-dim); } .prompt-warning { @@ -336,6 +393,26 @@ textarea { border-left: 2px solid var(--warn); } +.disclosure-stack { + display: flex; + flex-direction: column; + gap: 6px; +} +.disclosure { + padding: 8px 10px; + border: 1px solid rgba(148, 163, 184, 0.24); + border-radius: 6px; + background: rgba(15, 23, 42, 0.58); + color: var(--text-dim); + font-size: 11px; + line-height: 1.4; +} +.disclosure.warning { + border-color: rgba(245, 158, 11, 0.28); + background: rgba(245, 158, 11, 0.1); + color: var(--warn); +} + .backend-banner { padding: 6px 10px; border-radius: 4px; diff --git a/goal.md b/goal.md new file mode 100644 index 0000000..cc6f02c --- /dev/null +++ b/goal.md @@ -0,0 +1,1321 @@ +/goal You are building MedSim/SIMULAR into the strongest production-grade surgical robotics simulation, synthetic-data, autonomy-training, validation, and hospital-pilot evaluation platform possible from the current repository state. + +This is not a toy demo. This is not just a UI refresh. This is a full production hardening, physics-backend, autonomy-training, validation, deployment, and QMS-readiness build. + +PROJECT CONTEXT: +Repo root: /Users/mihirmodi/Documents/Simular/medsim +GitHub repo: wdwd720/SIMULAR +Primary product name: MedSim +Current branch: feat/workbench-visual-upgrade +Frontend: frontend/src +Backend/simulation: src/medsim + +Known current state: +- The runnable backend is currently a deterministic placeholder backend. +- SOFA exists only as a preview/skeleton path and must not be falsely described as implemented physics unless real runnable SOFA execution works. +- Existing platform includes typed scene/scenario configs, a narrow laparoscopic needle-passing/suturing primitive, deterministic perturbations, a prompt-driven FastAPI workbench, React/Vite/react-three-fiber frontend viewer, run/session manager, recorder/export, JSONL episode traces, replay validation, evaluation scaffolding, backend capability/compliance structure, and artifact storage under artifacts/runs//... +- Important likely files include: + - frontend/src/components/Viewer.tsx + - frontend/src/api.ts + - src/medsim/api/app.py + - src/medsim/workbench/*.py + - src/medsim/data/recorder.py + - src/medsim/data/schema.py + - src/medsim/eval/replay.py + - src/medsim/eval/metrics.py + - src/medsim/sim/placeholder_backend.py + - src/medsim/sim/models.py + - src/medsim/cli.py + +TOP-LEVEL PRODUCT TARGET: +Transform MedSim into a production-grade surgical robotics simulation and synthetic-data infrastructure platform for: +1. surgical robotics R&D, +2. high-fidelity simulation experiments, +3. synthetic dataset generation, +4. imitation-learning and reinforcement-learning dataset/export workflows, +5. autonomy-policy benchmarking in simulation, +6. supervised phantom/bench validation workflows, +7. medical simulation center / hospital innovation pilot evaluation, +8. research-lab deployment, +9. audit-grade reproducibility, +10. future regulated-product readiness. + +Correct positioning: +“MedSim is a production-grade surgical robotics simulation, synthetic-data, replay/evaluation, and supervised autonomy-training workbench for non-patient R&D, robotics labs, medical simulation centers, and hospital innovation pilots. It supports deterministic infrastructure-validation runs today and has a structured high-fidelity SOFA backend path for real physics. Clinical use, patient-care use, live autonomous surgery, and regulated deployment require formal validation, domain-expert review, QMS controls, cybersecurity review, and regulatory clearance where applicable.” + +ABSOLUTE CLAIM BOUNDARY: +Do not claim: +- FDA-cleared +- clinically validated +- patient-care ready +- autonomous surgery ready +- hospital production approved +- physically perfect +- real SOFA physics complete unless actually executable +- safe for live robotic actuation on humans +- equivalent to a commercial surgical robot simulator unless benchmarked + +Allowed claims: +- production-grade infrastructure +- research/pilot-ready engineering foundation +- supervised non-patient pilot mode +- synthetic-data generation +- simulation and benchmark infrastructure +- traceable/replayable/evaluable runs +- high-fidelity backend pathway +- autonomy-training environment in simulation/phantom contexts +- QMS/regulatory-readiness artifacts, not certification + +IMPORTANT: +The goal is to build the maximum serious product spine. If real clinical/hospital/autonomous readiness requires external validation, leave gated artifacts and documentation, not fake claims. The code should enforce those boundaries through metadata, UI warnings, docs, and validation gates. + +FIRST ACTIONS: +1. Audit the repository. +2. Read README, pyproject/config files, frontend package files, backend API, sim backend models, placeholder backend, workbench modules, recorder/schema, replay/eval code, CLI code, and tests. +3. Run baseline checks before major changes: + - python -m pytest -q + - ruff check . + - mypy or pyright if configured + - npm install if needed + - npm test if configured + - npm run build if configured + - any existing project-specific commands +4. Create/update docs/PRODUCTION_BUILD_LOG.md immediately. +5. Record: + - baseline repo state + - baseline commands and results + - current backend capabilities + - current placeholder vs SOFA gap + - chosen architecture plan + - every completed checkpoint + - every command run + - every remaining known gap + +DO NOT: +- Delete or break the placeholder backend. +- Rewrite the entire repo blindly. +- Make unsupported regulatory/clinical claims. +- Fake passing tests. +- Fake SOFA execution. +- Hide missing dependencies. +- Silently skip failures. +- Produce vague TODO-only docs instead of real implementation. +- Stop after cosmetic UI changes. + +CORE ARCHITECTURE TO BUILD: + +================================================================================ +PHASE 1 — PRODUCTION BACKEND ABSTRACTION +================================================================================ + +Implement a clean simulation backend architecture. + +Create or upgrade: +- src/medsim/sim/base.py +- src/medsim/sim/backend_registry.py +- src/medsim/sim/capabilities.py +- src/medsim/sim/errors.py +- src/medsim/sim/runtime.py if useful + +BackendProtocol / abstract backend should support: +- backend name +- version +- health check +- capability reporting +- scenario validation +- run creation +- step execution if supported +- full episode execution +- artifact output +- replay compatibility metadata +- deterministic seed handling +- backend-specific limitations + +BackendCapabilities must include: +- name +- version +- available +- real_physics +- deterministic +- supports_contacts +- supports_tissue_deformation +- supports_tool_control +- supports_needle +- supports_replay +- supports_domain_randomization +- supports_policy_rollout +- supports_rl_env +- supports_gpu if known +- supports_fem +- supports_collision_forces +- supports_material_params +- supports_phantom_validation +- limitations +- missing_dependencies +- install_hint +- fidelity_level + +BackendHealth must include: +- backend +- status: available/unavailable/degraded +- checked_at +- dependency_versions +- errors +- warnings +- install_hint +- verification_command + +Fidelity levels: +- placeholder_deterministic +- sofa_unavailable_adapter_only +- sofa_minimal_unvalidated +- sofa_fem_experimental +- phantom_validated +- clinically_validated + +Rules: +- placeholder backend must report placeholder_deterministic. +- SOFA missing dependencies must report sofa_unavailable_adapter_only. +- SOFA executable minimal scene may report sofa_minimal_unvalidated. +- FEM tissue scene may report sofa_fem_experimental. +- phantom_validated is allowed only if actual phantom comparison artifacts exist. +- clinically_validated is blocked unless explicit clinical validation artifacts exist. Do not create this by default. + +Acceptance: +- Placeholder backend still runs. +- Backend registry lists placeholder and SOFA. +- Missing SOFA never breaks the app. +- Health/capability endpoints and CLI commands work. +- Tests cover registry, health, missing dependencies, placeholder execution. + +================================================================================ +PHASE 2 — PRESERVE AND STRENGTHEN PLACEHOLDER BACKEND +================================================================================ + +The placeholder backend is the CI-fast deterministic backend. Keep it working and improve it into a serious infrastructure-validation backend. + +Upgrade placeholder backend to: +- conform to BackendProtocol +- expose capabilities honestly +- generate complete artifacts +- support deterministic replay +- support synthetic contact/deformation placeholders clearly labeled as placeholder +- support scripted policy rollout +- support baseline autonomy benchmark +- support stable seed behavior +- output metrics and traces in the same schema as future SOFA runs + +Do not call placeholder outputs physical truth. +Every placeholder run artifact must contain: +- backend: placeholder +- fidelity_level: placeholder_deterministic +- physical_accuracy: false +- intended_use: infrastructure_validation + +Acceptance: +- deterministic repeat runs match expected hashes/tolerances +- replay validation works +- existing tests continue passing +- new tests prove artifact completeness + +================================================================================ +PHASE 3 — REAL SOFA BACKEND PATH +================================================================================ + +Upgrade the SOFA path from skeleton into a serious backend adapter. + +Create/upgrade: +- src/medsim/sim/sofa_backend.py +- src/medsim/sim/sofa_preflight.py +- src/medsim/sim/sofa_scene_builder.py +- src/medsim/sim/sofa_extractors.py +- src/medsim/sim/sofa_materials.py if needed +- tests for SOFA missing dependency behavior +- optional tests that run only when SOFA is installed + +SOFA requirements: +- Import SOFA/SofaPython3 only inside preflight/backend init, not at global package import. +- If SOFA is missing, return graceful unavailable health with exact install hint and verification commands. +- If SOFA exists, support at least one canonical minimal runnable scene. +- Build SOFA scene from MedSim scenario config. +- Support canonical surgical scene components: + - tissue object + - target points/rings + - needle object + - tool/gripper representation + - camera metadata + - solver params + - material params + - collision/contact configuration +- Expose extraction hooks for: + - tissue node/mesh state + - tool pose + - needle pose + - target state + - contact events + - solver status + - deformation summary +- Save SOFA artifacts under artifacts/runs//... +- Store solver/material params in artifact metadata. +- Include backend dependency versions in environment/provenance. + +If SOFA cannot actually run locally: +- implement adapter/preflight/scene builder as far as possible +- add tests for missing dependency behavior +- add optional tests guarded by SOFA availability +- document exact install/verification steps +- mark SOFA status honestly + +Acceptance: +- App does not crash without SOFA. +- CLI/API clearly report SOFA unavailable if missing. +- If SOFA is installed, canonical scene execution path exists. +- No fake successful SOFA run. +- Documentation explains what is real and what remains. + +================================================================================ +PHASE 4 — PHYSICS AND SCENARIO CONFIG SYSTEM +================================================================================ + +Upgrade scene/scenario schemas into production-grade typed contracts. + +Support: +- schema versioning +- tissue geometry +- tissue mesh references +- procedural fallback tissue geometry +- tissue material properties +- Young’s modulus +- Poisson ratio +- density +- damping +- friction +- contact stiffness +- solver params +- collision params +- needle geometry/material params +- laparoscopic tool geometry/control params +- camera params +- lighting/visual params +- perturbation distributions +- domain randomization +- task definitions +- success/failure criteria +- safety boundaries +- autonomy reward config +- dataset split metadata +- provenance metadata + +Create/upgrade: +- src/medsim/config/schema.py if applicable +- src/medsim/sim/materials.py +- src/medsim/sim/contact.py +- src/medsim/sim/instruments.py +- src/medsim/sim/tasks.py +- src/medsim/sim/scenario_validation.py + +Add config validation: +- medsim validate-config +- API endpoint for config validation +- frontend validation feedback + +Validation should catch: +- invalid ranges +- missing required fields +- unsupported backend features +- unsafe/autonomy-prohibited configs +- mismatched fidelity claims +- unknown schema versions + +Acceptance: +- configs validate +- invalid configs fail clearly +- old configs either migrate or fail with clear message +- tests cover validation + +================================================================================ +PHASE 5 — TRACE, RECORDER, DATASET, PROVENANCE SYSTEM +================================================================================ + +Upgrade all run outputs into a versioned audit-grade dataset contract. + +Every run directory: +artifacts/runs// + run_summary.json + scenario_resolved.json + backend_capabilities.json + environment.json + provenance.json + metrics.json + validation.json + replay_validation.json + states.jsonl + events.jsonl + commands.jsonl + contacts.jsonl + observations.jsonl + actions.jsonl + rewards.jsonl + policy_rollout.json if autonomy used + logs.txt if available + artifact_manifest.json + checksums.json + +Each run must record: +- run_id +- scenario_id +- backend +- backend version +- fidelity_level +- seed +- config hash +- scenario hash +- git commit if available +- dirty git state if available +- timestamp +- host/platform info +- Python version +- dependency versions +- frontend/backend version if available +- deterministic/replay status +- validation status +- intended use +- disallowed use flags +- artifact checksums + +Dataset tools: +- medsim dataset-index artifacts/runs +- medsim export-dataset --runs artifacts/runs --out artifacts/datasets/ +- medsim validate-dataset artifacts/datasets/ +- medsim dataset-summary artifacts/runs + +Dataset export formats: +- generic JSONL +- imitation-learning episodes +- RL transition format +- metadata CSV +- manifest JSON +- optional Parquet if dependency exists or can be cleanly added + +Acceptance: +- one demo run produces complete artifact structure +- artifact manifest/checksums exist +- dataset index works +- dataset validation works +- tests cover schema roundtrip and artifact completeness + +================================================================================ +PHASE 6 — REPLAY AND EVALUATION ENGINE +================================================================================ + +Upgrade replay/eval into a serious benchmark system. + +Replay modes: +- exact deterministic replay for placeholder +- tolerance-based replay for physics backends +- schema compatibility checking +- artifact completeness checking +- hash/checksum checking +- drift reporting + +Metrics: +- task_success +- task_phase_completion +- needle_target_position_error +- needle_orientation_error +- entry_point_error +- exit_point_error +- tissue_deformation_summary +- contact_count +- contact_duration +- max_contact_force if available +- mean_contact_force if available +- collision_count +- safety_violation_count +- tool_path_length +- trajectory_smoothness +- jerk_proxy +- command_saturation_count +- workspace_boundary_violations +- gripper_state_transitions +- time_to_completion +- replay_drift +- reproducibility_score +- autonomy_reward_total +- autonomy_success_rate +- policy_safety_score +- dataset_quality_score + +CLI: +- medsim replay-validate +- medsim run-eval --runs artifacts/runs --out artifacts/eval/ +- medsim compare-runs +- medsim benchmark --scenario --backend placeholder --episodes 10 +- medsim benchmark --scenario --backend sofa --episodes 10 + +Eval output: +artifacts/eval// + eval_summary.json + eval_report.md + runs.csv + failures.json + metric_distributions.json + plots/ if feasible + +Acceptance: +- replay validation works on placeholder run +- eval report generation works +- benchmark works with placeholder backend +- tests cover key metrics + +================================================================================ +PHASE 7 — AUTONOMY TRAINING INFRASTRUCTURE +================================================================================ + +Build simulation-only autonomy training infrastructure. + +Create: +- src/medsim/autonomy/ +- src/medsim/autonomy/schemas.py +- src/medsim/autonomy/env.py +- src/medsim/autonomy/policies.py +- src/medsim/autonomy/rewards.py +- src/medsim/autonomy/safety.py +- src/medsim/autonomy/benchmark.py +- src/medsim/autonomy/export.py +- tests for autonomy modules + +Autonomy scope: +- simulation only +- phantom/bench only if hardware abstraction is later used +- no live patient use +- no autonomous real surgical actuation + +Observation schema: +- tool pose +- needle pose +- gripper state +- target pose +- target completion state +- tissue deformation summary +- contact summary +- safety state +- task phase +- previous command +- camera pose if available + +Action schema: +- tool delta position +- tool delta rotation +- gripper open/close +- needle alignment primitive +- task primitive command +- no-op +- emergency stop + +Reward schema: +- target distance reduction +- orientation improvement +- smoothness penalty +- collision penalty +- contact force penalty if available +- workspace violation penalty +- time penalty +- completion reward +- unsafe command penalty + +Safety shield: +- workspace bounds +- maximum command delta +- max velocity +- max rotation delta +- max gripper transitions +- invalid command rejection +- emergency stop state +- forbidden zone constraints +- unsafe contact escalation +- full logging of rejected actions + +Policies: +- random baseline +- scripted baseline +- heuristic needle-pass baseline +- replay-demonstration policy if traces exist + +Environment: +- Gymnasium-compatible wrapper if dependency exists or can be added cleanly +- otherwise implement internal Env API with reset/step/render/close +- do not hard require gymnasium if it complicates install +- provide optional dependency notes + +CLI: +- medsim autonomy benchmark --backend placeholder --scenario --episodes 10 +- medsim autonomy export-demos --runs artifacts/runs --out artifacts/datasets/demo_policy +- medsim autonomy evaluate-policy --policy scripted --scenario + +Artifacts: +- policy_rollout.json +- observations.jsonl +- actions.jsonl +- rewards.jsonl +- safety_events.jsonl +- autonomy_metrics.json + +Docs: +- docs/AUTONOMY_TRAINING.md + +Acceptance: +- scripted policy runs on placeholder backend +- benchmark produces metrics +- autonomy artifacts export +- safety shield rejects invalid commands +- tests cover env, safety, rewards, and baseline policy + +================================================================================ +PHASE 8 — SAFE ROBOT BENCH INTEGRATION ARCHITECTURE +================================================================================ + +Implement only safe abstraction and docs for bench/phantom robot use. + +Create: +- src/medsim/robotics/ +- src/medsim/robotics/interface.py +- src/medsim/robotics/dry_run.py +- src/medsim/robotics/safety.py +- src/medsim/robotics/calibration.py if useful + +Rules: +- No real robot actuation by default. +- Dry-run command preview only. +- Hardware adapters must be explicitly enabled. +- Add “NO PATIENT USE” metadata. +- Add emergency stop interface placeholder. +- Add command limits. +- Add calibration requirement schema. +- Add hardware safety checklist docs. +- Add tests for dry-run and safety limits. + +Docs: +- docs/ROBOT_BENCH_INTEGRATION.md + +Acceptance: +- dry-run robot command preview works +- safety limits enforced +- no real actuation code runs accidentally +- docs are clear + +================================================================================ +PHASE 9 — PHANTOM / BENCH VALIDATION WORKFLOW +================================================================================ + +Create validation workflow for comparing simulation against real phantom/bench data. + +Create: +- src/medsim/validation/ +- src/medsim/validation/phantom.py +- src/medsim/validation/schemas.py +- src/medsim/validation/compare.py +- tests with synthetic fixture bench data + +Bench metadata schema: +- phantom material +- phantom geometry +- camera/tracker system +- calibration metadata +- operator +- instrument setup +- trial ID +- task type +- recorded trajectory +- target points +- observed deformation proxy +- observed contact timing if available +- notes +- data quality flags + +Comparison metrics: +- trajectory RMSE +- endpoint error +- needle pose error +- contact timing error +- deformation proxy error +- task success agreement +- timing difference +- safety event agreement + +CLI: +- medsim phantom compare --sim-run --bench-data --out artifacts/validation/ +- medsim phantom validate-fixture + +Docs: +- docs/PHANTOM_VALIDATION_PROTOCOL.md + +Acceptance: +- fixture bench comparison works +- report generated +- phantom_validated fidelity is blocked unless validation artifacts exist +- docs explain real validation path + +================================================================================ +PHASE 10 — FASTAPI PRODUCTION API +================================================================================ + +Upgrade the API into a serious production/pilot API. + +Endpoints: +- GET /health +- GET /version +- GET /dependencies +- GET /backends +- GET /backends/{name}/health +- POST /scenarios/validate +- GET /scenarios +- POST /runs +- GET /runs +- GET /runs/{run_id} +- GET /runs/{run_id}/artifacts +- GET /runs/{run_id}/metrics +- GET /runs/{run_id}/events +- GET /runs/{run_id}/states +- POST /runs/{run_id}/replay/validate +- POST /eval +- POST /dataset/index +- POST /dataset/export +- POST /autonomy/benchmark +- POST /autonomy/export +- POST /validation/phantom/compare +- POST /validation/bundle/export +- GET /qms/traceability +- POST /qms/verify +- POST /workbench/prompt-to-scenario if already supported + +Production API features: +- strict Pydantic schemas +- structured errors +- safe artifact path lookup +- path traversal prevention +- request size limits +- timeout handling +- CORS via environment config +- optional API key auth for pilot deployments +- audit logging +- no secret logging +- run cancellation hook if feasible +- pagination/limits for large traces +- stable response formats + +Acceptance: +- API tests pass +- frontend can consume endpoints +- missing artifact paths fail safely +- invalid config errors are useful +- optional auth does not break local dev + +================================================================================ +PHASE 11 — FRONTEND PRODUCTION WORKBENCH +================================================================================ + +Upgrade React/Vite/react-three-fiber UI into a serious MedSim workbench. + +UI layout: +- left panel: scenario builder/config/prompt +- center: 3D viewer +- right panel: backend/fidelity/metrics/validation/artifacts +- bottom panel: timeline/events/replay/autonomy logs + +Required UI sections: +1. Backend Selector + - placeholder + - SOFA + - capability badges + - health status + - missing dependency warnings + - fidelity level display + +2. Scenario Builder + - YAML/JSON view + - common form controls + - material params + - solver params + - perturbation/domain randomization + - seed control + - validation results + +3. Run Control + - start run + - status + - replay + - validate replay + - export artifacts + - export validation bundle + +4. 3D Viewer + - tissue + - tools + - needle + - target rings/points + - trajectory traces + - contact markers + - safety boundary visualization + - camera controls + - high-quality surgical bay visuals labeled as non-authoritative decoration + +5. Metrics Panel + - task success + - needle error + - contact count + - deformation summary + - collision/safety violations + - replay status + - autonomy reward/success if applicable + +6. Autonomy Panel + - baseline policy selector + - benchmark start + - reward summary + - safety events + - policy rollout artifacts + - dataset export + +7. Validation Panel + - replay validation + - phantom comparison if available + - validation bundle export + - requirements traceability status + - QMS docs links if feasible + +8. Hospital Pilot Mode Panel + - supervised non-patient mode only + - risk acknowledgement + - fidelity disclosure + - operator/pilot metadata + - validation checklist + - exportable pilot bundle + +Mandatory UI warnings: +- Placeholder backend: + “Placeholder backend: deterministic infrastructure validation only, not biomechanical physics.” +- SOFA unavailable: + “SOFA backend unavailable: install/configure SOFA to run high-fidelity scenes.” +- Hospital pilot mode: + “Supervised non-patient pilot mode only. Not for patient-care decisions. Not validated for autonomous surgery.” +- Autonomy: + “Simulation/phantom autonomy training only. No live patient use.” + +Acceptance: +- frontend builds +- API client typed +- loading/error/empty states exist +- UI does not claim clinical approval +- UI exposes metrics/artifacts/validation/fidelity honestly + +================================================================================ +PHASE 12 — HOSPITAL PILOT MODE +================================================================================ + +Implement supervised non-patient hospital pilot mode. + +Hospital pilot mode means: +- no patient care +- no clinical decision support +- no live autonomous surgery +- supervised evaluation only +- simulation/phantom training only +- audit logging enabled +- validation bundle export enabled +- risk acknowledgement visible +- fidelity disclosure required + +Implement: +- pilot deployment config profile +- use_mode field: + - research + - training + - hospital_pilot_non_patient + - prohibited_clinical_use +- UI mode selector if appropriate +- API validation that blocks prohibited clinical use claims +- run metadata fields: + - operator_id optional + - institution optional + - reviewer optional + - approved_protocol_id optional + - use_mode + - risk_acknowledged + - intended_use +- artifact immutability/checksum metadata +- validation bundle export + +Docs: +- docs/HOSPITAL_PILOT_READINESS.md + +Acceptance: +- hospital pilot metadata appears in run artifacts +- warning appears in UI/API metadata +- validation bundle includes pilot metadata +- tests cover use mode validation + +================================================================================ +PHASE 13 — VALIDATION BUNDLE EXPORT +================================================================================ + +Add command: +- medsim export-validation-bundle --run --out artifacts/validation/ + +Bundle includes: +- run_summary.json +- scenario_resolved.json +- backend_capabilities.json +- environment.json +- provenance.json +- metrics.json +- validation.json +- replay_validation.json +- artifact_manifest.json +- checksums.json +- traceability snapshot +- risk disclosure +- fidelity disclosure +- software version/git commit +- dependency versions +- pilot metadata if present +- autonomy metrics if present +- phantom comparison if present +- markdown summary report + +Acceptance: +- validation bundle exports for a placeholder demo run +- checksums included +- missing pieces reported clearly +- tests cover bundle creation + +================================================================================ +PHASE 14 — QMS / REGULATORY READINESS ARTIFACTS +================================================================================ + +Create docs/qms/ with real useful docs, not empty filler. + +Files: +- docs/qms/PRODUCT_REQUIREMENTS.md +- docs/qms/SOFTWARE_REQUIREMENTS_SPEC.md +- docs/qms/REQUIREMENTS.yaml +- docs/qms/TRACEABILITY_MATRIX.md +- docs/qms/VERIFICATION_PLAN.md +- docs/qms/VERIFICATION_REPORT_TEMPLATE.md +- docs/qms/RISK_MANAGEMENT_PLAN.md +- docs/qms/RISK_REGISTER.md +- docs/qms/CHANGE_CONTROL.md +- docs/qms/RELEASE_CHECKLIST.md +- docs/qms/CYBERSECURITY_PLAN.md +- docs/qms/SBOM_NOTES.md +- docs/qms/DATA_GOVERNANCE.md +- docs/qms/CLINICAL_VALIDATION_PLAN.md +- docs/qms/REGULATORY_STRATEGY_NOTES.md +- docs/qms/INTENDED_USE_AND_CLAIMS.md + +Requirements YAML fields: +- id +- title +- description +- rationale +- risk_links +- verification_method +- verification_artifacts +- implementation_files +- tests +- status +- owner optional +- release_blocking boolean + +CLI: +- medsim qms trace +- medsim qms verify +- medsim qms export + +QMS verify should: +- load requirements +- check every release-blocking requirement has verification method +- check every critical risk has mitigation +- check every implemented feature has honest status if feasible +- output traceability report + +Acceptance: +- QMS docs exist and are meaningful +- qms verify works +- traceability report generated +- no false certification claims + +================================================================================ +PHASE 15 — CYBERSECURITY / SECURITY HARDENING +================================================================================ + +Implement practical security hardening. + +Required: +- path traversal prevention for artifact access +- input size limits for config/prompt upload +- CORS env config +- optional API key auth for pilot deployment +- structured audit logging +- no secrets in logs +- dependency/version endpoint +- safe error messages +- validation of file paths +- artifact root isolation +- SBOM generation notes or command if feasible +- security tests + +Environment variables: +- MEDSIM_ARTIFACT_ROOT +- MEDSIM_API_KEY optional +- MEDSIM_CORS_ORIGINS +- MEDSIM_RUN_TIMEOUT_SECONDS +- MEDSIM_MAX_CONFIG_BYTES +- MEDSIM_MODE + +Docs: +- docs/SECURITY.md or docs/qms/CYBERSECURITY_PLAN.md + +Acceptance: +- tests for unsafe paths +- optional auth works when configured +- local dev works without auth unless pilot mode requires it +- docs explain production settings + +================================================================================ +PHASE 16 — PACKAGING / DEPLOYMENT +================================================================================ + +Make it deployable. + +Add/upgrade: +- Dockerfile +- docker-compose.yml +- Makefile or Taskfile +- .env.example +- deployment docs +- install verification command +- demo script +- local dev quickstart + +Commands: +- make install +- make test +- make lint +- make typecheck if configured +- make dev-api +- make dev-frontend +- make build-frontend +- make demo-run +- make demo-autonomy +- make demo-eval +- make validation-bundle +- make verify + +If Makefile is not appropriate, add equivalent scripts documented in README. + +Docs: +- docs/DEPLOYMENT.md +- docs/DEMO_SCRIPT.md + +Acceptance: +- fresh install path documented +- demo commands work or clearly state missing dependency +- Docker build attempted if feasible +- frontend production build passes + +================================================================================ +PHASE 17 — DOCUMENTATION OVERHAUL +================================================================================ + +Update/create: + +README.md: +- clear product description +- honest backend status +- install +- quickstart +- workbench launch +- placeholder demo +- SOFA backend path +- autonomy benchmark +- dataset export +- replay validation +- eval report +- validation bundle +- hospital pilot mode +- limitations +- disallowed claims + +docs/ARCHITECTURE.md: +- backend abstraction +- config schema +- trace schema +- eval stack +- autonomy stack +- UI/API architecture +- artifact lifecycle + +docs/SOFA_BACKEND.md: +- install +- verification +- what is implemented +- what is not +- how scene builder works +- fidelity levels +- next SOFA tasks + +docs/AUTONOMY_TRAINING.md: +- observation/action/reward +- policy interface +- safety shield +- benchmark commands +- dataset export +- simulation-only limitation + +docs/ROBOT_BENCH_INTEGRATION.md: +- dry-run only by default +- hardware abstraction +- calibration +- emergency stop expectations +- no patient use + +docs/PHANTOM_VALIDATION_PROTOCOL.md: +- phantom setup +- bench capture +- sim-vs-real metrics +- validation acceptance path + +docs/HOSPITAL_PILOT_READINESS.md: +- supervised non-patient use +- install checklist +- operator checklist +- validation checklist +- cybersecurity checklist +- disallowed uses + +docs/REGULATORY_BOUNDARY.md: +- intended use +- non-clinical limitations +- regulated claim triggers +- required review before clinical deployment + +docs/VALIDATION_PLAN.md: +- verification vs validation +- replay validation +- physics validation +- phantom validation +- domain expert review + +docs/MEDSIM_PRODUCT_POSITIONING.md: +- what MedSim is +- what MedSim is not +- safe pitch language +- unsafe claims to avoid + +ROADMAP_PRODUCTION.md: +- remaining gaps +- SOFA work +- validation work +- clinical/regulatory work +- commercialization work +- short-term next steps + +Acceptance: +- docs are not generic fluff +- docs match actual implementation status +- no false claims + +================================================================================ +PHASE 18 — FINAL DEMO PIPELINE +================================================================================ + +At the end, the repo must support a full demo sequence. + +Demo sequence should include: +1. backend health check +2. config validation +3. placeholder surgical run +4. replay validation +5. eval report +6. autonomy benchmark with scripted policy +7. dataset export +8. validation bundle export +9. frontend build +10. workbench launch instructions +11. SOFA health check +12. QMS traceability verification + +Add a script if feasible: +- scripts/demo_full_pipeline.sh +or CLI command: +- medsim demo full + +The demo must produce artifacts under: +- artifacts/runs/ +- artifacts/eval/ +- artifacts/datasets/ +- artifacts/validation/ + +Acceptance: +- demo pipeline works with placeholder backend +- SOFA part gracefully reports unavailable if missing +- output paths documented +- commands included in README and docs/DEMO_SCRIPT.md + +================================================================================ +PHASE 19 — TESTING AND QUALITY GATES +================================================================================ + +Tests to add/upgrade: +- backend registry tests +- placeholder backend protocol tests +- SOFA preflight tests +- missing dependency tests +- config validation tests +- material validation tests +- artifact path safety tests +- recorder output tests +- trace schema tests +- replay validation tests +- metric calculation tests +- dataset export tests +- autonomy env tests +- autonomy safety shield tests +- baseline policy tests +- phantom comparison fixture tests +- validation bundle tests +- QMS traceability tests +- API endpoint tests +- frontend build +- frontend tests if stack exists + +Commands to run: +- python -m pytest -q +- ruff check . +- mypy/pyright if configured +- npm test if configured +- npm run build +- demo pipeline command if implemented + +If something fails: +- fix it if reasonable +- if not fixable, document exact failure and reason in docs/PRODUCTION_BUILD_LOG.md +- do not claim success + +================================================================================ +PHASE 20 — FINAL ACCEPTANCE CRITERIA +================================================================================ + +The goal is complete only when: + +Backend: +- placeholder backend works +- backend protocol exists +- backend registry works +- SOFA backend path exists +- SOFA missing dependency behavior is graceful +- capabilities/health are accurate + +Physics: +- placeholder is honestly labeled +- SOFA is honestly labeled +- fidelity levels are enforced +- material/solver/contact schemas exist + +Data: +- complete artifact structure exists +- provenance exists +- checksums exist +- dataset indexing works +- dataset export works + +Replay/eval: +- replay validation works +- metrics are generated +- eval report works +- benchmark works + +Autonomy: +- autonomy module exists +- observation/action/reward schemas exist +- safety shield exists +- baseline scripted policy works +- benchmark artifacts export +- simulation-only boundary is enforced + +Hospital pilot: +- hospital pilot mode exists +- non-patient warnings exist +- pilot metadata exists +- validation bundle export works + +Validation: +- phantom validation scaffold exists +- fixture compare works +- validation docs exist +- phantom_validated is gated + +QMS: +- QMS docs exist +- requirements YAML exists +- traceability command works +- verification report works +- risk register exists + +Security: +- path traversal tests exist +- optional API key auth exists or documented +- CORS/env config exists +- no unsafe artifact serving + +API: +- health/backends/runs/eval/dataset/autonomy/validation/qms endpoints exist where appropriate +- API tests pass + +Frontend: +- frontend builds +- UI shows backend status +- UI shows fidelity +- UI shows metrics +- UI shows warnings +- UI shows artifacts +- UI supports validation bundle/export path +- UI does not make false claims + +Docs: +- README accurate +- architecture docs complete +- SOFA docs complete +- autonomy docs complete +- hospital pilot docs complete +- regulatory boundary docs complete +- production roadmap complete + +Final proof: +- tests/lint/build commands run +- demo commands documented +- docs/PRODUCTION_BUILD_LOG.md updated with final result +- ROADMAP_PRODUCTION.md updated with remaining real-world gaps + +================================================================================ +FINAL RESPONSE REQUIRED FROM CODEX +================================================================================ + +When finished, produce a concise but complete final report: + +1. What was built +2. Files changed +3. Commands run +4. Test/lint/build results +5. Demo commands +6. What is real now +7. What is still placeholder +8. SOFA status +9. Autonomy-training status +10. Hospital pilot readiness status +11. Validation/QMS status +12. Exact remaining gaps before actual medical-facility clinical use +13. Next five highest-leverage tasks + +Start now by auditing the repository, creating/updating docs/PRODUCTION_BUILD_LOG.md, running baseline tests/builds, then implementing the phases in order with tests after each major checkpoint. \ No newline at end of file diff --git a/src/medsim/api/app.py b/src/medsim/api/app.py index 1b4f4cd..a307405 100644 --- a/src/medsim/api/app.py +++ b/src/medsim/api/app.py @@ -22,19 +22,31 @@ from __future__ import annotations +import os from pathlib import Path from typing import Any -from fastapi import FastAPI, HTTPException +from fastapi import FastAPI, HTTPException, Request from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import JSONResponse from pydantic import BaseModel, ConfigDict, Field from medsim import __version__ +from medsim.autonomy.benchmark import run_autonomy_benchmark +from medsim.autonomy.export import export_demonstrations from medsim.config.loader import load_scenario_config, load_scene_config from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.data.dataset import dataset_index, export_dataset +from medsim.eval.runner import run_eval_report +from medsim.sim.backend_registry import get_backend_registry +from medsim.sim.errors import BackendNotFoundError from medsim.sim.placeholder_backend import PlaceholderBackend +from medsim.sim.scenario_validation import validate_config_payload from medsim.sim.sofa.availability import check_sofa_available from medsim.sim.sofa_backend import SofaBackend +from medsim.sim.sofa_preflight import run_sofa_preflight +from medsim.validation.bundle import export_validation_bundle +from medsim.validation.compare import compare_phantom_run from medsim.workbench.prompt import ( CompiledPromptResult, CompiledScenarioDraft, @@ -86,6 +98,59 @@ class ScenePreviewRequest(BaseModel): include_sofa_preview: bool = False +class ConfigValidationRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + config: dict[str, Any] + kind: str | None = None + backend: str | None = None + + +class EvalRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + runs: str + out: str + + +class DatasetIndexRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + runs: str + + +class DatasetExportRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + runs: str + out: str + + +class AutonomyBenchmarkRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + scenario: str + scene: str = "configs/base_scene.yaml" + backend: str = "placeholder" + policy: str = "scripted" + episodes: int = Field(default=1, ge=1, le=100) + out: str | None = None + + +class AutonomyExportRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + runs: str + out: str + + +class PhantomCompareRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + sim_run: str + bench_data: str + out: str + + +class ValidationBundleRequest(BaseModel): + model_config = ConfigDict(extra="forbid") + run: str + out: str + + def _load_scenario_library(scenario_dir: Path) -> dict[str, tuple[Path, ScenarioConfig]]: library: dict[str, tuple[Path, ScenarioConfig]] = {} if not scenario_dir.exists(): @@ -129,18 +194,83 @@ def create_app( ) app = FastAPI(title="medsim workbench", version=__version__) + api_key = os.environ.get("MEDSIM_API_KEY") + cors_origins = [ + origin.strip() + for origin in os.environ.get("MEDSIM_CORS_ORIGINS", "*").split(",") + if origin.strip() + ] app.add_middleware( CORSMiddleware, - allow_origins=["*"], + allow_origins=cors_origins or ["*"], allow_methods=["*"], allow_headers=["*"], ) app.state.workbench = state + @app.middleware("http") + async def api_key_middleware(request: Request, call_next): + public_paths = {"/health", "/api/health", "/version", "/api/version"} + if api_key and request.url.path not in public_paths: + if request.headers.get("x-api-key") != api_key: + return JSONResponse( + status_code=401, + content={"detail": "Invalid or missing API key."}, + ) + return await call_next(request) + @app.get("/api/health") + @app.get("/health") def health() -> dict[str, Any]: return {"status": "ok", "medsim_version": __version__} + @app.get("/version") + @app.get("/api/version") + def version() -> dict[str, Any]: + return {"medsim_version": __version__} + + @app.get("/dependencies") + @app.get("/api/dependencies") + def dependencies() -> dict[str, Any]: + sofa = check_sofa_available() + return { + "medsim_version": __version__, + "sofa": { + "available": sofa.available, + "module_name": sofa.module_name, + "version": sofa.version, + "error": sofa.error, + }, + } + + @app.get("/backends") + @app.get("/api/backends") + def backends() -> dict[str, Any]: + registry = get_backend_registry() + return { + "schema_version": "medsim.backends.v1", + "backends": [ + capabilities.model_dump(mode="json") + for capabilities in registry.list_capabilities() + ], + } + + @app.get("/backends/{name}/health") + @app.get("/api/backends/{name}/health") + def backend_health(name: str) -> dict[str, Any]: + try: + return get_backend_registry().health(name).model_dump(mode="json") + except BackendNotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + + @app.get("/backends/{name}/capabilities") + @app.get("/api/backends/{name}/capabilities") + def backend_capabilities(name: str) -> dict[str, Any]: + try: + return get_backend_registry().capabilities(name).model_dump(mode="json") + except BackendNotFoundError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + @app.get("/api/metadata") def metadata() -> dict[str, Any]: availability = check_sofa_available() @@ -212,6 +342,15 @@ def scene_preview(request: ScenePreviewRequest) -> dict[str, Any]: payload["sofa_preview"] = build_sofa_preview(scene_config, request.scenario) return payload + @app.post("/api/scenarios/validate") + @app.post("/scenarios/validate") + def validate_scenario_config(request: ConfigValidationRequest) -> dict[str, Any]: + return validate_config_payload( + request.config, + kind=request.kind, + backend_name=request.backend, + ).model_dump(mode="json") + @app.get("/api/scenarios/configs") def scenario_configs() -> list[dict[str, Any]]: return [ @@ -323,6 +462,24 @@ def get_run_events(run_id: str) -> list[dict[str, Any]]: raise HTTPException(status_code=404, detail=str(exc)) from exc return list(session.event_timeline) + @app.get("/api/runs/{run_id}/metrics") + @app.get("/runs/{run_id}/metrics") + def get_run_metrics(run_id: str) -> dict[str, Any]: + try: + session = manager.get(run_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + return session.summary.metrics_summary if session.summary else {} + + @app.get("/api/runs/{run_id}/states") + @app.get("/runs/{run_id}/states") + def get_run_states(run_id: str, limit: int = 100) -> list[dict[str, Any]]: + try: + session = manager.get(run_id) + except KeyError as exc: + raise HTTPException(status_code=404, detail=str(exc)) from exc + return [snapshot.state for snapshot in session.snapshots[: max(0, min(limit, 1000))]] + @app.get("/api/runs/{run_id}/artifacts") def get_run_artifacts(run_id: str) -> dict[str, Any]: try: @@ -337,6 +494,61 @@ def get_run_artifacts(run_id: str) -> dict[str, Any]: "status": session.status, } + @app.post("/api/eval") + @app.post("/eval") + def run_eval(request: EvalRequest) -> dict[str, Any]: + runs = _safe_artifact_path(request.runs) + out = _safe_artifact_path(request.out) + return run_eval_report(runs, out) + + @app.post("/api/dataset/index") + @app.post("/dataset/index") + def api_dataset_index(request: DatasetIndexRequest) -> dict[str, Any]: + return dataset_index(_safe_artifact_path(request.runs)) + + @app.post("/api/dataset/export") + @app.post("/dataset/export") + def api_dataset_export(request: DatasetExportRequest) -> dict[str, Any]: + return export_dataset(_safe_artifact_path(request.runs), _safe_artifact_path(request.out)) + + @app.post("/api/autonomy/benchmark") + @app.post("/autonomy/benchmark") + def api_autonomy_benchmark(request: AutonomyBenchmarkRequest) -> dict[str, Any]: + out = _safe_artifact_path(request.out) if request.out else None + return run_autonomy_benchmark( + scene_path=request.scene, + scenario_path=request.scenario, + backend_name=request.backend, + policy_name=request.policy, + episodes=request.episodes, + out_dir=out, + ) + + @app.post("/api/autonomy/export") + @app.post("/autonomy/export") + def api_autonomy_export(request: AutonomyExportRequest) -> dict[str, Any]: + return export_demonstrations( + _safe_artifact_path(request.runs), + _safe_artifact_path(request.out), + ) + + @app.post("/api/validation/phantom/compare") + @app.post("/validation/phantom/compare") + def api_phantom_compare(request: PhantomCompareRequest) -> dict[str, Any]: + return compare_phantom_run( + _safe_artifact_path(request.sim_run), + _safe_artifact_path(request.bench_data), + _safe_artifact_path(request.out), + ).model_dump(mode="json") + + @app.post("/api/validation/bundle/export") + @app.post("/validation/bundle/export") + def api_validation_bundle(request: ValidationBundleRequest) -> dict[str, Any]: + return export_validation_bundle( + _safe_artifact_path(request.run), + _safe_artifact_path(request.out), + ) + @app.post("/api/runs/{run_id}/advance") def advance_run(run_id: str, request: AdvanceRequest) -> dict[str, Any]: try: @@ -357,4 +569,31 @@ def cancel_run(run_id: str) -> dict[str, Any]: def sofa_preview(request: ScenePreviewRequest) -> dict[str, Any]: return build_sofa_preview(scene_config, request.scenario) + @app.get("/api/sofa/preflight") + @app.get("/sofa/preflight") + def sofa_preflight( + scenario_id: str = "normal", + attempt_runtime: bool = False, + ) -> dict[str, Any]: + if scenario_id not in state.scenario_library: + raise HTTPException(status_code=404, detail=f"Unknown scenario_id: {scenario_id}") + scenario = state.scenario_library[scenario_id][1] + return run_sofa_preflight( + scene_config, + scenario, + attempt_runtime=attempt_runtime, + ).model_dump(mode="json") + return app + + +def _safe_artifact_path(path: str | Path) -> Path: + candidate = Path(path) + if candidate.is_absolute(): + resolved = candidate.resolve() + else: + resolved = (Path.cwd() / candidate).resolve() + root = Path.cwd().resolve() + if root not in (resolved, *resolved.parents): + raise HTTPException(status_code=400, detail="Artifact path escapes repository root.") + return resolved diff --git a/src/medsim/autonomy/__init__.py b/src/medsim/autonomy/__init__.py new file mode 100644 index 0000000..f5c5ff2 --- /dev/null +++ b/src/medsim/autonomy/__init__.py @@ -0,0 +1,14 @@ +"""Simulation-only autonomy training utilities.""" + +from medsim.autonomy.benchmark import run_autonomy_benchmark +from medsim.autonomy.env import AutonomyEnv +from medsim.autonomy.policies import RandomBaselinePolicy, ScriptedNeedlePassPolicy +from medsim.autonomy.safety import SafetyShield + +__all__ = [ + "AutonomyEnv", + "RandomBaselinePolicy", + "SafetyShield", + "ScriptedNeedlePassPolicy", + "run_autonomy_benchmark", +] diff --git a/src/medsim/autonomy/benchmark.py b/src/medsim/autonomy/benchmark.py new file mode 100644 index 0000000..1842187 --- /dev/null +++ b/src/medsim/autonomy/benchmark.py @@ -0,0 +1,106 @@ +"""Autonomy benchmark artifact generation.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from medsim.autonomy.env import AutonomyEnv +from medsim.autonomy.policies import RandomBaselinePolicy, ScriptedNeedlePassPolicy +from medsim.autonomy.schemas import AutonomyMetrics +from medsim.config.loader import load_scenario_config, load_scene_config +from medsim.data.exporters import write_json, write_jsonl +from medsim.sim.sofa_preflight import run_sofa_preflight +from medsim.utils.paths import create_run_dir, make_run_id + + +def run_autonomy_benchmark( + *, + scene_path: Path | str, + scenario_path: Path | str, + backend_name: str = "placeholder", + policy_name: str = "scripted", + episodes: int = 1, + out_dir: Path | str | None = None, +) -> dict[str, Any]: + """Run a simulation-only autonomy benchmark.""" + + scene = load_scene_config(scene_path) + scenario = load_scenario_config(scenario_path) + if backend_name != "placeholder": + report = run_sofa_preflight(scene, scenario, attempt_runtime=True).model_dump(mode="json") + report["autonomy_status"] = "unavailable" + return report + + run_dir = ( + Path(out_dir) + if out_dir + else create_run_dir("artifacts/runs", make_run_id("autonomy")) + ) + run_dir.mkdir(parents=True, exist_ok=True) + observations = [] + actions = [] + rewards = [] + safety_events = [] + rollout = [] + success_count = 0 + reward_total = 0.0 + for episode_index in range(episodes): + env = AutonomyEnv(scene) + policy = _policy(policy_name, scene, seed=episode_index) + observation = env.reset(seed=episode_index + 1, scenario=scenario) + done = False + while not done: + if isinstance(policy, ScriptedNeedlePassPolicy): + backend_action = policy.backend_action( + env.env.get_state(), + env.backend.get_observation(), + ) + next_observation, reward, done, info = env.step(backend_action) + action_payload = backend_action.to_dict() + else: + autonomy_action = policy.act(observation) + next_observation, reward, done, info = env.step(autonomy_action) + action_payload = autonomy_action.model_dump(mode="json") + row_base = { + "episode_index": episode_index, + "step_index": env.env.get_state().step_index, + } + observations.append({**row_base, **next_observation.model_dump(mode="json")}) + actions.append({**row_base, "action": action_payload}) + rewards.append({**row_base, **reward.model_dump(mode="json")}) + safety_events.extend(info["safety_events"]) + reward_total += reward.total + observation = next_observation + state = env.render() + if state.get("outcome") == "success": + success_count += 1 + rollout.append({"episode_index": episode_index, "terminal_state": state}) + env.close() + + metrics = AutonomyMetrics( + backend=backend_name, + policy=policy_name, + episodes=episodes, + success_count=success_count, + success_rate=success_count / episodes if episodes else 0.0, + reward_total=reward_total, + safety_event_count=len(safety_events), + ) + write_json(run_dir / "policy_rollout.json", {"episodes": rollout}) + write_jsonl(run_dir / "observations.jsonl", observations) + write_jsonl(run_dir / "actions.jsonl", actions) + write_jsonl(run_dir / "rewards.jsonl", rewards) + write_jsonl(run_dir / "safety_events.jsonl", safety_events) + write_json(run_dir / "autonomy_metrics.json", metrics) + return { + "schema_version": "medsim.autonomy_benchmark.v1", + "run_dir": str(run_dir), + "metrics": metrics.model_dump(mode="json"), + } + + +def _policy(policy_name: str, scene, seed: int): + if policy_name == "random": + return RandomBaselinePolicy(seed=seed) + return ScriptedNeedlePassPolicy(scene) diff --git a/src/medsim/autonomy/env.py b/src/medsim/autonomy/env.py new file mode 100644 index 0000000..12f8373 --- /dev/null +++ b/src/medsim/autonomy/env.py @@ -0,0 +1,92 @@ +"""Internal simulation-only autonomy environment.""" + +from __future__ import annotations + +from typing import Any + +from medsim.autonomy.rewards import compute_reward +from medsim.autonomy.safety import SafetyShield +from medsim.autonomy.schemas import ( + AutonomyAction, + AutonomyObservation, + RewardBreakdown, + SafetyEvent, +) +from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.sim.env import SurgicalEnv +from medsim.sim.models import Action, Observation +from medsim.sim.placeholder_backend import PlaceholderBackend + + +class AutonomyEnv: + """Reset/step environment for simulation-only autonomy experiments.""" + + def __init__(self, scene_config: SceneConfig) -> None: + self.scene_config = scene_config + self.backend = PlaceholderBackend() + self.env = SurgicalEnv(self.backend, scene_config) + self.safety = SafetyShield(scene_config) + self.previous_command: dict[str, Any] | None = None + self._last_observation: Observation | None = None + + def reset(self, seed: int, scenario: ScenarioConfig) -> AutonomyObservation: + observation = self.env.reset(seed=seed, scenario=scenario) + self._last_observation = observation + return self._autonomy_observation(observation) + + def step( + self, + action: AutonomyAction | Action, + ) -> tuple[AutonomyObservation, RewardBreakdown, bool, dict[str, Any]]: + state = self.env.get_state() + safety_events: list[SafetyEvent] = [] + if isinstance(action, Action): + backend_action = action + else: + backend_action, safety_events = self.safety.filter_action(action, state) + result = self.env.step(backend_action) + self.previous_command = backend_action.to_dict() + self._last_observation = result.observation + reward = compute_reward(result, unsafe_event_count=len(safety_events)) + info = { + "events": [event.to_dict() for event in result.events], + "safety_events": [event.model_dump(mode="json") for event in safety_events], + "outcome": result.outcome, + "failure_reason": result.failure_reason, + } + return self._autonomy_observation(result.observation), reward, result.done, info + + def render(self) -> dict[str, Any]: + return self.env.get_state().to_dict() + + def close(self) -> None: + self.env.close() + + def _autonomy_observation(self, observation: Observation) -> AutonomyObservation: + state = self.env.get_state() + contacts = sum(tool.collision_count for tool in state.tools.values()) + return AutonomyObservation( + tool_poses=observation.tool_poses, + needle_pose=observation.needle_pose, + gripper_state={ + tool_id: tool.grasping_needle for tool_id, tool in state.tools.items() + }, + target_centers=observation.target_centers, + target_completion_state=list(state.needle.passed_targets), + tissue_deformation_summary={ + "source": "placeholder_synthetic", + "physical_accuracy": False, + }, + contact_summary={ + "source": "placeholder_synthetic", + "physical_accuracy": False, + "contact_count": contacts, + }, + safety_state={ + "emergency_stopped": self.safety.emergency_stopped, + "no_patient_use": True, + }, + task_phase="complete" if state.done else "needle_passing", + previous_command=self.previous_command, + camera_quality=observation.camera_quality, + ) diff --git a/src/medsim/autonomy/export.py b/src/medsim/autonomy/export.py new file mode 100644 index 0000000..f6b43ae --- /dev/null +++ b/src/medsim/autonomy/export.py @@ -0,0 +1,35 @@ +"""Autonomy demonstration export.""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any + +from medsim.data.exporters import write_json, write_jsonl + + +def export_demonstrations(runs_root: Path | str, out_dir: Path | str) -> dict[str, Any]: + """Export autonomy action/observation rows from run artifacts.""" + + root = Path(runs_root) + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + rows = [] + for run_dir in sorted(root.glob("autonomy_*")) + sorted(root.glob("run_*")): + obs_path = run_dir / "observations.jsonl" + act_path = run_dir / "actions.jsonl" + if not obs_path.exists() or not act_path.exists(): + continue + observations = obs_path.read_text(encoding="utf-8").splitlines() + actions = act_path.read_text(encoding="utf-8").splitlines() + for observation, action in zip(observations, actions, strict=False): + rows.append({"observation": observation, "action": action, "run_id": run_dir.name}) + write_jsonl(out / "demo_policy.jsonl", rows) + manifest = { + "schema_version": "medsim.autonomy_demo_export.v1", + "row_count": len(rows), + "source_runs": str(root), + "format": "jsonl_string_pairs", + } + write_json(out / "manifest.json", manifest) + return manifest diff --git a/src/medsim/autonomy/policies.py b/src/medsim/autonomy/policies.py new file mode 100644 index 0000000..7649fb8 --- /dev/null +++ b/src/medsim/autonomy/policies.py @@ -0,0 +1,50 @@ +"""Baseline autonomy policies.""" + +from __future__ import annotations + +from random import Random + +from medsim.autonomy.schemas import AutonomyAction, AutonomyObservation +from medsim.config.models import SceneConfig +from medsim.sim.models import Observation +from medsim.sim.state import SimulationState +from medsim.tasks.needle_passing import NeedlePassingTask + + +class RandomBaselinePolicy: + """Small random baseline for simulation-only smoke benchmarks.""" + + name = "random" + + def __init__(self, seed: int = 0) -> None: + self._rng = Random(seed) + + def act(self, observation: AutonomyObservation) -> AutonomyAction: + del observation + return AutonomyAction( + tool_delta_position={ + "right_driver": [ + self._rng.uniform(-1.0, 1.0), + self._rng.uniform(-1.0, 1.0), + self._rng.uniform(-1.0, 1.0), + ] + }, + gripper="hold", + primitive="task_step", + ) + + +class ScriptedNeedlePassPolicy: + """Heuristic baseline backed by the existing deterministic task policy.""" + + name = "scripted" + + def __init__(self, scene_config: SceneConfig) -> None: + self.task = NeedlePassingTask.from_scene_config(scene_config) + + def backend_action(self, state: SimulationState, observation: Observation): + return self.task.scripted_action(state, observation) + + def act(self, observation: AutonomyObservation) -> AutonomyAction: + del observation + return AutonomyAction(primitive="task_step") diff --git a/src/medsim/autonomy/rewards.py b/src/medsim/autonomy/rewards.py new file mode 100644 index 0000000..497a320 --- /dev/null +++ b/src/medsim/autonomy/rewards.py @@ -0,0 +1,24 @@ +"""Reward calculation for simulation-only autonomy.""" + +from __future__ import annotations + +from medsim.autonomy.schemas import RewardBreakdown +from medsim.sim.models import StepResult + + +def compute_reward(result: StepResult, unsafe_event_count: int = 0) -> RewardBreakdown: + """Compute a conservative placeholder reward from step result metadata.""" + + collision_count = sum(tool.collision_count for tool in result.state.tools.values()) + completion = 10.0 if result.outcome == "success" else 0.0 + collision_penalty = float(collision_count) + unsafe_penalty = float(unsafe_event_count) + time_penalty = 0.01 + total = completion - collision_penalty - unsafe_penalty - time_penalty + return RewardBreakdown( + total=total, + collision_penalty=collision_penalty, + time_penalty=time_penalty, + completion_reward=completion, + unsafe_command_penalty=unsafe_penalty, + ) diff --git a/src/medsim/autonomy/safety.py b/src/medsim/autonomy/safety.py new file mode 100644 index 0000000..8e5a195 --- /dev/null +++ b/src/medsim/autonomy/safety.py @@ -0,0 +1,66 @@ +"""Simulation-only autonomy safety shield.""" + +from __future__ import annotations + +from medsim.autonomy.schemas import AutonomyAction, SafetyEvent +from medsim.config.models import SafetyBoundariesConfig, SceneConfig +from medsim.sim.models import Action +from medsim.sim.state import SimulationState + + +class SafetyShield: + """Reject unsafe autonomy commands before backend execution.""" + + def __init__(self, scene_config: SceneConfig) -> None: + self.scene_config = scene_config + self.config: SafetyBoundariesConfig = scene_config.safety_boundaries + self.emergency_stopped = False + + def filter_action( + self, + autonomy_action: AutonomyAction, + state: SimulationState, + ) -> tuple[Action, list[SafetyEvent]]: + """Return a backend action and any safety events.""" + + events: list[SafetyEvent] = [] + if self.emergency_stopped or autonomy_action.primitive == "emergency_stop": + self.emergency_stopped = True + return Action.no_op(), [ + SafetyEvent( + event_type="emergency_stop", + message="Emergency stop active; command converted to no-op.", + rejected=True, + ) + ] + + tool_deltas = {} + for tool_id, delta in autonomy_action.tool_delta_position.items(): + if tool_id not in state.tools: + events.append( + SafetyEvent( + event_type="unknown_tool", + message=f"Unknown tool command rejected: {tool_id}", + rejected=True, + ) + ) + continue + if any(abs(float(value)) > self.config.max_command_delta_mm for value in delta): + events.append( + SafetyEvent( + event_type="max_delta_exceeded", + message=f"Command for {tool_id} exceeds max delta.", + rejected=True, + metadata={"delta": list(delta)}, + ) + ) + continue + tool_deltas[tool_id] = (float(delta[0]), float(delta[1]), float(delta[2])) + + if events: + return Action.no_op(), events + return Action( + tool_deltas=tool_deltas, + grasp=autonomy_action.gripper == "close", + release=autonomy_action.gripper == "open", + ), [] diff --git a/src/medsim/autonomy/schemas.py b/src/medsim/autonomy/schemas.py new file mode 100644 index 0000000..71ce12b --- /dev/null +++ b/src/medsim/autonomy/schemas.py @@ -0,0 +1,71 @@ +"""Autonomy observation, action, reward, and safety schemas.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any, Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class AutonomyModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + +class AutonomyObservation(AutonomyModel): + schema_version: str = "medsim.autonomy_observation.v1" + tool_poses: dict[str, list[float]] + needle_pose: list[float] + gripper_state: dict[str, bool] + target_centers: dict[str, list[float]] + target_completion_state: list[str] + tissue_deformation_summary: dict[str, Any] + contact_summary: dict[str, Any] + safety_state: dict[str, Any] + task_phase: str + previous_command: dict[str, Any] | None = None + camera_pose: list[float] | None = None + camera_quality: float + + +class AutonomyAction(AutonomyModel): + schema_version: str = "medsim.autonomy_action.v1" + tool_delta_position: dict[str, list[float]] = Field(default_factory=dict) + tool_delta_rotation: dict[str, list[float]] = Field(default_factory=dict) + gripper: Literal["open", "close", "hold"] = "hold" + primitive: Literal["needle_align", "task_step", "noop", "emergency_stop"] = "task_step" + + +class RewardBreakdown(AutonomyModel): + schema_version: str = "medsim.autonomy_reward.v1" + total: float + target_progress: float = 0.0 + orientation: float = 0.0 + smoothness_penalty: float = 0.0 + collision_penalty: float = 0.0 + workspace_violation_penalty: float = 0.0 + time_penalty: float = 0.0 + completion_reward: float = 0.0 + unsafe_command_penalty: float = 0.0 + + +class SafetyEvent(AutonomyModel): + schema_version: str = "medsim.autonomy_safety_event.v1" + timestamp_utc: datetime = Field(default_factory=lambda: datetime.now(UTC)) + event_type: str + message: str + rejected: bool + metadata: dict[str, Any] = Field(default_factory=dict) + + +class AutonomyMetrics(AutonomyModel): + schema_version: str = "medsim.autonomy_metrics.v1" + backend: str + policy: str + episodes: int + success_count: int + success_rate: float + reward_total: float + safety_event_count: int + simulation_only: bool = True + no_patient_use: bool = True diff --git a/src/medsim/cli.py b/src/medsim/cli.py index ad05ea9..7ffaa8d 100644 --- a/src/medsim/cli.py +++ b/src/medsim/cli.py @@ -3,40 +3,251 @@ from __future__ import annotations import argparse +import json from collections.abc import Sequence from pathlib import Path +from medsim.autonomy.benchmark import run_autonomy_benchmark +from medsim.autonomy.export import export_demonstrations from medsim.compliance.runner import run_backend_compliance from medsim.config.loader import load_scenario_config, load_scene_config from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.data.artifacts import write_run_artifact_bundle +from medsim.data.claims import artifact_claim_metadata +from medsim.data.dataset import ( + dataset_index, + dataset_summary, + export_dataset, + validate_dataset, +) from medsim.data.exporters import write_json from medsim.data.recorder import EpisodeRecorder from medsim.data.schema import EpisodeSummary from medsim.eval.metrics import aggregate_replay_validations, aggregate_run, aggregate_summaries from medsim.eval.replay import validate_replay, validate_run_replays +from medsim.eval.runner import compare_runs as compare_run_metrics +from medsim.eval.runner import run_eval_report from medsim.scenarios.generator import ScenarioGenerator from medsim.scenarios.perturbations import derive_perturbation_effects from medsim.sim.backend import SimulationBackend +from medsim.sim.backend_registry import get_backend_registry from medsim.sim.env import SurgicalEnv -from medsim.sim.placeholder_backend import PlaceholderBackend -from medsim.sim.sofa_backend import SofaBackend +from medsim.sim.errors import BackendNotFoundError +from medsim.sim.scenario_validation import validate_config_path +from medsim.sim.sofa_preflight import run_sofa_preflight from medsim.tasks.needle_passing import NeedlePassingTask from medsim.utils.paths import ( create_episode_dir, create_run_dir, + make_run_id, write_config_snapshot, write_run_manifest, ) +from medsim.validation.bundle import export_validation_bundle +from medsim.validation.compare import compare_phantom_run +from medsim.validation.phantom import synthetic_bench_fixture def build_backend(scene_config: SceneConfig) -> SimulationBackend: """Construct the backend selected by a scene config.""" - if scene_config.backend.name == "placeholder": - return PlaceholderBackend() - if scene_config.backend.name == "sofa": - return SofaBackend() - raise ValueError(f"Unsupported backend: {scene_config.backend.name}") + return get_backend_registry().create(scene_config.backend.name) + + +def backend_list() -> dict[str, object]: + """Return all registered backend capabilities.""" + + registry = get_backend_registry() + payload = { + "schema_version": "medsim.backends.v1", + "backends": [ + capabilities.model_dump(mode="json") + for capabilities in registry.list_capabilities() + ], + } + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def backend_health(backend_name: str) -> dict[str, object]: + """Return backend health.""" + + registry = get_backend_registry() + payload = registry.health(backend_name).model_dump(mode="json") + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def backend_capabilities(backend_name: str) -> dict[str, object]: + """Return backend capabilities.""" + + registry = get_backend_registry() + payload = registry.capabilities(backend_name).model_dump(mode="json") + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def sofa_preflight( + scene_path: Path | str, + scenario_path: Path | str, + *, + attempt_runtime: bool = False, +) -> dict[str, object]: + """Run SOFA preflight for one scene/scenario pair.""" + + scene_config = load_scene_config(scene_path) + scenario = load_scenario_config(scenario_path) + report = run_sofa_preflight( + scene_config, + scenario, + attempt_runtime=attempt_runtime, + ) + payload = report.model_dump(mode="json") + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def validate_config( + config_path: Path | str, + *, + kind: str | None = None, + backend_name: str | None = None, +) -> dict[str, object]: + """Validate one scene/scenario config file.""" + + report = validate_config_path(config_path, kind=kind, backend_name=backend_name) + payload = report.model_dump(mode="json") + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def dataset_index_command(runs_root: Path | str) -> dict[str, object]: + payload = dataset_index(runs_root) + write_json(Path(runs_root) / "dataset_index.json", payload) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def dataset_summary_command(runs_root: Path | str) -> dict[str, object]: + payload = dataset_summary(runs_root) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def export_dataset_command(runs_root: Path | str, out_dir: Path | str) -> dict[str, object]: + payload = export_dataset(runs_root, out_dir) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def validate_dataset_command(dataset_dir: Path | str) -> dict[str, object]: + payload = validate_dataset(dataset_dir) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def run_eval_command(runs_root: Path | str, out_dir: Path | str) -> dict[str, object]: + payload = run_eval_report(runs_root, out_dir) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def compare_runs_command(run_a: Path | str, run_b: Path | str) -> dict[str, object]: + payload = compare_run_metrics(run_a, run_b) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def benchmark_command( + scene_path: Path | str, + scenario_path: Path | str, + backend_name: str, + episodes: int, +) -> dict[str, object]: + """Run a benchmark with the requested backend.""" + + if backend_name != "placeholder": + payload = sofa_preflight(scene_path, scenario_path, attempt_runtime=True) + payload["benchmark_status"] = "unavailable" + return payload + run_dir = generate_dataset( + scene_path=scene_path, + scenario_paths=[scenario_path], + episodes=episodes, + seed=1, + ) + replay_validate(artifacts_dir=run_dir) + eval_dir = Path("artifacts/eval") / make_run_id("benchmark") + eval_summary = run_eval_report(run_dir, eval_dir) + payload = { + "schema_version": "medsim.benchmark.v1", + "backend": backend_name, + "scenario": str(scenario_path), + "episodes": episodes, + "run_dir": str(run_dir), + "eval_dir": str(eval_dir), + "eval_summary": eval_summary, + } + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def autonomy_benchmark_command( + scene_path: Path | str, + scenario_path: Path | str, + backend_name: str, + policy_name: str, + episodes: int, + out_dir: Path | None = None, +) -> dict[str, object]: + payload = run_autonomy_benchmark( + scene_path=scene_path, + scenario_path=scenario_path, + backend_name=backend_name, + policy_name=policy_name, + episodes=episodes, + out_dir=out_dir, + ) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def autonomy_export_demos_command(runs_root: Path | str, out_dir: Path | str) -> dict[str, object]: + payload = export_demonstrations(runs_root, out_dir) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def phantom_compare_command( + sim_run: Path | str, + bench_data: Path | str, + out_dir: Path | str, +) -> dict[str, object]: + report = compare_phantom_run(sim_run, bench_data, out_dir) + payload = report.model_dump(mode="json") + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def phantom_validate_fixture_command(out_dir: Path | str) -> dict[str, object]: + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + fixture = synthetic_bench_fixture() + write_json(out / "synthetic_bench_fixture.json", fixture) + payload = { + "schema_version": "medsim.phantom_fixture_validation.v1", + "valid": True, + "fixture": str(out / "synthetic_bench_fixture.json"), + "phantom_validated_claim_allowed": False, + } + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload + + +def export_validation_bundle_command(run_dir: Path | str, out_dir: Path | str) -> dict[str, object]: + payload = export_validation_bundle(run_dir, out_dir) + print(json.dumps(payload, indent=2, sort_keys=True)) + return payload def run_episode( @@ -50,6 +261,7 @@ def run_episode( ) -> EpisodeSummary: """Run and export one episode.""" + claims = artifact_claim_metadata(scene_config.backend.name) task = NeedlePassingTask.from_scene_config(scene_config) effects = derive_perturbation_effects(scenario, seed) recorder = EpisodeRecorder( @@ -59,6 +271,7 @@ def run_episode( output_dir=run_dir, perturbation_effects=effects, config_snapshot=config_snapshot, + artifact_metadata=claims, ) result = env.run_episode( seed=seed, @@ -104,8 +317,12 @@ def run_single_scene( ) finally: env.close() - aggregate = aggregate_summaries([summary]) + aggregate = { + **artifact_claim_metadata(scene_config.backend.name), + **aggregate_summaries([summary]), + } write_json(run_dir / "aggregate_metrics.json", aggregate) + write_run_artifact_bundle(run_dir, scene_config, [scenario], [summary], aggregate) print( f"wrote {run_dir} | scenario={summary.scenario_id} " f"outcome={summary.outcome_label} steps={summary.steps}" @@ -154,8 +371,12 @@ def generate_dataset( finally: env.close() - aggregate = aggregate_summaries(summaries) + aggregate = { + **artifact_claim_metadata(scene_config.backend.name), + **aggregate_summaries(summaries), + } write_json(run_dir / "aggregate_metrics.json", aggregate) + write_run_artifact_bundle(run_dir, scene_config, scenarios, summaries, aggregate) print( f"wrote {run_dir} | episodes={aggregate['episode_count']} " f"success_rate={aggregate['success_rate']:.3f}" @@ -205,6 +426,18 @@ def replay_validate( aggregate = aggregate_replay_validations(results) payload = { "schema_version": "medsim.replay_validation_aggregate.v1", + **( + { + "backend": results[0].backend, + "backend_version": results[0].backend_version, + "fidelity_level": results[0].fidelity_level, + "physical_accuracy": results[0].physical_accuracy, + "intended_use": results[0].intended_use, + "disallowed_uses": results[0].disallowed_uses, + } + if results + else artifact_claim_metadata("placeholder") + ), "aggregate": aggregate, "results": [result.model_dump(mode="json") for result in results], } @@ -259,15 +492,57 @@ def _build_parser() -> argparse.ArgumentParser: dataset_parser.add_argument("--seed", required=True, type=int) dataset_parser.add_argument("--output-root", type=Path, default=None) + dataset_index_parser = subparsers.add_parser("dataset-index", help="index run artifacts") + dataset_index_parser.add_argument("runs_root", type=Path) + + dataset_summary_parser = subparsers.add_parser("dataset-summary", help="summarize runs") + dataset_summary_parser.add_argument("runs_root", type=Path) + + export_dataset_parser = subparsers.add_parser("export-dataset", help="export dataset") + export_dataset_parser.add_argument("--runs", required=True, type=Path) + export_dataset_parser.add_argument("--out", required=True, type=Path) + + validate_dataset_parser = subparsers.add_parser( + "validate-dataset", + help="validate exported dataset", + ) + validate_dataset_parser.add_argument("dataset_dir", type=Path) + + bundle_parser = subparsers.add_parser( + "export-validation-bundle", + help="export validation bundle for one run", + ) + bundle_parser.add_argument("--run", required=True, type=Path) + bundle_parser.add_argument("--out", required=True, type=Path) + eval_parser = subparsers.add_parser("eval", help="evaluate a generated run") eval_parser.add_argument("--artifacts", required=True, type=Path) + run_eval_parser = subparsers.add_parser("run-eval", help="write eval report artifacts") + run_eval_parser.add_argument("--runs", required=True, type=Path) + run_eval_parser.add_argument("--out", required=True, type=Path) + + compare_parser = subparsers.add_parser("compare-runs", help="compare two runs") + compare_parser.add_argument("run_a", type=Path) + compare_parser.add_argument("run_b", type=Path) + + benchmark_parser = subparsers.add_parser("benchmark", help="run backend benchmark") + benchmark_parser.add_argument("--scene", type=Path, default=Path("configs/base_scene.yaml")) + benchmark_parser.add_argument("--scenario", required=True, type=Path) + benchmark_parser.add_argument("--backend", required=True, choices=["placeholder", "sofa"]) + benchmark_parser.add_argument("--episodes", required=True, type=int) + replay_parser = subparsers.add_parser("replay-validate", help="validate deterministic replay") replay_group = replay_parser.add_mutually_exclusive_group(required=True) replay_group.add_argument("--artifacts", type=Path) replay_group.add_argument("--summary", type=Path) replay_parser.add_argument("--tolerance", type=float, default=1e-9) + validate_parser = subparsers.add_parser("validate-config", help="validate a config file") + validate_parser.add_argument("path", type=Path) + validate_parser.add_argument("--kind", choices=["scene", "scenario"], default=None) + validate_parser.add_argument("--backend", default=None) + contract_parser = subparsers.add_parser( "check-backend-contract", help="run backend contract compliance checks", @@ -277,6 +552,88 @@ def _build_parser() -> argparse.ArgumentParser: contract_parser.add_argument("--scenarios", required=True, nargs="+", type=Path) contract_parser.add_argument("--output-root", type=Path, default=Path("artifacts/compliance")) + backends_parser = subparsers.add_parser("backends", help="inspect backend registry") + backend_subparsers = backends_parser.add_subparsers(dest="backends_command", required=True) + backend_subparsers.add_parser("list", help="list registered backends and capabilities") + backend_health_parser = backend_subparsers.add_parser("health", help="show backend health") + backend_health_parser.add_argument("backend") + backend_capabilities_parser = backend_subparsers.add_parser( + "capabilities", + help="show backend capabilities", + ) + backend_capabilities_parser.add_argument("backend") + + sofa_parser = subparsers.add_parser("sofa", help="inspect SOFA backend adapter") + sofa_subparsers = sofa_parser.add_subparsers(dest="sofa_command", required=True) + sofa_preflight_parser = sofa_subparsers.add_parser("preflight", help="run SOFA preflight") + sofa_preflight_parser.add_argument( + "--scene", + type=Path, + default=Path("configs/base_scene.yaml"), + ) + sofa_preflight_parser.add_argument( + "--scenario", + type=Path, + default=Path("configs/scenarios/normal.yaml"), + ) + sofa_preflight_parser.add_argument("--attempt-runtime", action="store_true") + + autonomy_parser = subparsers.add_parser("autonomy", help="simulation-only autonomy tools") + autonomy_subparsers = autonomy_parser.add_subparsers(dest="autonomy_command", required=True) + autonomy_benchmark_parser = autonomy_subparsers.add_parser( + "benchmark", + help="run autonomy benchmark", + ) + autonomy_benchmark_parser.add_argument( + "--scene", + type=Path, + default=Path("configs/base_scene.yaml"), + ) + autonomy_benchmark_parser.add_argument("--scenario", required=True, type=Path) + autonomy_benchmark_parser.add_argument("--backend", default="placeholder") + autonomy_benchmark_parser.add_argument( + "--policy", + default="scripted", + choices=["scripted", "random"], + ) + autonomy_benchmark_parser.add_argument("--episodes", type=int, default=1) + autonomy_benchmark_parser.add_argument("--out", type=Path, default=None) + + autonomy_export_parser = autonomy_subparsers.add_parser( + "export-demos", + help="export autonomy demonstrations", + ) + autonomy_export_parser.add_argument("--runs", required=True, type=Path) + autonomy_export_parser.add_argument("--out", required=True, type=Path) + + autonomy_eval_parser = autonomy_subparsers.add_parser( + "evaluate-policy", + help="evaluate a baseline policy", + ) + autonomy_eval_parser.add_argument( + "--scene", + type=Path, + default=Path("configs/base_scene.yaml"), + ) + autonomy_eval_parser.add_argument("--scenario", required=True, type=Path) + autonomy_eval_parser.add_argument( + "--policy", + default="scripted", + choices=["scripted", "random"], + ) + + phantom_parser = subparsers.add_parser("phantom", help="phantom validation tools") + phantom_subparsers = phantom_parser.add_subparsers(dest="phantom_command", required=True) + phantom_compare_parser = phantom_subparsers.add_parser("compare", help="compare sim to bench") + phantom_compare_parser.add_argument("--sim-run", required=True, type=Path) + phantom_compare_parser.add_argument("--bench-data", required=True, type=Path) + phantom_compare_parser.add_argument("--out", required=True, type=Path) + phantom_fixture_parser = phantom_subparsers.add_parser( + "validate-fixture", + help="write and validate synthetic bench fixture", + ) + phantom_fixture_parser.add_argument("--out", type=Path, default=Path("artifacts/validation")) + return parser @@ -301,14 +658,45 @@ def main(argv: Sequence[str] | None = None) -> None: seed=args.seed, output_root=args.output_root, ) + elif args.command == "dataset-index": + dataset_index_command(args.runs_root) + elif args.command == "dataset-summary": + dataset_summary_command(args.runs_root) + elif args.command == "export-dataset": + export_dataset_command(args.runs, args.out) + elif args.command == "validate-dataset": + payload = validate_dataset_command(args.dataset_dir) + if not payload["valid"]: + raise SystemExit(1) + elif args.command == "export-validation-bundle": + export_validation_bundle_command(args.run, args.out) elif args.command == "eval": evaluate_run(args.artifacts) + elif args.command == "run-eval": + run_eval_command(args.runs, args.out) + elif args.command == "compare-runs": + compare_runs_command(args.run_a, args.run_b) + elif args.command == "benchmark": + benchmark_command( + scene_path=args.scene, + scenario_path=args.scenario, + backend_name=args.backend, + episodes=args.episodes, + ) elif args.command == "replay-validate": replay_validate( artifacts_dir=args.artifacts, summary_path=args.summary, tolerance=args.tolerance, ) + elif args.command == "validate-config": + payload = validate_config( + config_path=args.path, + kind=args.kind, + backend_name=args.backend, + ) + if not payload["valid"]: + raise SystemExit(1) elif args.command == "check-backend-contract": check_backend_contract( backend_name=args.backend, @@ -316,6 +704,56 @@ def main(argv: Sequence[str] | None = None) -> None: scenario_paths=args.scenarios, output_root=args.output_root, ) + elif args.command == "backends": + try: + if args.backends_command == "list": + backend_list() + elif args.backends_command == "health": + backend_health(args.backend) + elif args.backends_command == "capabilities": + backend_capabilities(args.backend) + else: + parser.error(f"Unknown backends command: {args.backends_command}") + except BackendNotFoundError as exc: + parser.error(str(exc)) + elif args.command == "sofa": + if args.sofa_command == "preflight": + sofa_preflight( + scene_path=args.scene, + scenario_path=args.scenario, + attempt_runtime=args.attempt_runtime, + ) + else: + parser.error(f"Unknown sofa command: {args.sofa_command}") + elif args.command == "autonomy": + if args.autonomy_command == "benchmark": + autonomy_benchmark_command( + scene_path=args.scene, + scenario_path=args.scenario, + backend_name=args.backend, + policy_name=args.policy, + episodes=args.episodes, + out_dir=args.out, + ) + elif args.autonomy_command == "export-demos": + autonomy_export_demos_command(args.runs, args.out) + elif args.autonomy_command == "evaluate-policy": + autonomy_benchmark_command( + scene_path=args.scene, + scenario_path=args.scenario, + backend_name="placeholder", + policy_name=args.policy, + episodes=1, + ) + else: + parser.error(f"Unknown autonomy command: {args.autonomy_command}") + elif args.command == "phantom": + if args.phantom_command == "compare": + phantom_compare_command(args.sim_run, args.bench_data, args.out) + elif args.phantom_command == "validate-fixture": + phantom_validate_fixture_command(args.out) + else: + parser.error(f"Unknown phantom command: {args.phantom_command}") else: parser.error(f"Unknown command: {args.command}") diff --git a/src/medsim/config/models.py b/src/medsim/config/models.py index db5f4cf..6cf6dab 100644 --- a/src/medsim/config/models.py +++ b/src/medsim/config/models.py @@ -11,6 +11,12 @@ Pose6 = Annotated[list[float], Field(min_length=6, max_length=6)] BackendName = Literal["placeholder", "sofa"] +UseMode = Literal[ + "research", + "training", + "hospital_pilot_non_patient", + "prohibited_clinical_use", +] PerturbationKind = Literal[ "camera_occlusion", "visibility_degradation", @@ -35,6 +41,143 @@ class BackendConfig(StrictConfigModel): deterministic_noise_mm: float = Field(default=0.0, ge=0) +class TissueGeometryConfig(StrictConfigModel): + """Tissue geometry and mesh references.""" + + representation: Literal["procedural_patch", "mesh"] = "procedural_patch" + dimensions_mm: Vec3 | None = None + mesh_path: str | None = None + procedural_fallback: bool = True + + +class MaterialPropertiesConfig(StrictConfigModel): + """Soft-body material parameters.""" + + young_modulus_pa: float = Field(default=50_000.0, gt=0) + poisson_ratio: float = Field(default=0.45, ge=0.0, lt=0.5) + density_kg_m3: float = Field(default=1_050.0, gt=0) + damping: float = Field(default=0.05, ge=0) + friction: float = Field(default=0.3, ge=0) + contact_stiffness: float = Field(default=1_000.0, gt=0) + validation_status: Literal["unvalidated_default", "phantom_fit", "clinical_validated"] = ( + "unvalidated_default" + ) + + +class SolverParamsConfig(StrictConfigModel): + """Numerical solver parameters for future physics backends.""" + + dt_s: float = Field(default=0.01, gt=0) + iterations: int = Field(default=25, gt=0) + tolerance: float = Field(default=1e-5, gt=0) + solver_type: str = "generic_implicit" + + +class CollisionParamsConfig(StrictConfigModel): + """Collision/contact configuration.""" + + enabled: bool = True + contact_distance_mm: float = Field(default=1.0, ge=0) + alarm_distance_mm: float = Field(default=3.0, ge=0) + friction: float = Field(default=0.3, ge=0) + stiffness: float = Field(default=1_000.0, gt=0) + + +class NeedleGeometryMaterialConfig(StrictConfigModel): + """Needle geometry/material parameters.""" + + radius_mm: float = Field(default=0.35, gt=0) + arc_degrees: float = Field(default=180.0, gt=0, le=360) + material: Literal["stainless_steel_placeholder", "custom"] = "stainless_steel_placeholder" + flexible: bool = False + + +class ToolControlParamsConfig(StrictConfigModel): + """Laparoscopic tool geometry/control parameters.""" + + jaw_length_mm: float = Field(default=8.0, gt=0) + shaft_diameter_mm: float = Field(default=5.0, gt=0) + max_delta_mm: float = Field(default=4.0, gt=0) + max_rotation_deg: float = Field(default=10.0, gt=0) + gripper_binary: bool = True + + +class LightingVisualParamsConfig(StrictConfigModel): + """Camera lighting and visual metadata.""" + + environment: str = "procedural_operating_room" + intensity: float = Field(default=1.0, ge=0) + decoration_non_authoritative: bool = True + + +class DomainRandomizationConfig(StrictConfigModel): + """Domain-randomization and perturbation distribution metadata.""" + + enabled: bool = False + seed: int | None = None + distributions: dict[str, Any] = Field(default_factory=dict) + + +class SuccessCriteriaConfig(StrictConfigModel): + """Task success/failure criteria metadata.""" + + require_all_targets: bool = True + max_collisions: int | None = None + max_workspace_violations: int = 0 + + +class SafetyBoundariesConfig(StrictConfigModel): + """Safety boundaries for simulation/autonomy commands.""" + + workspace_min: Vec3 | None = None + workspace_max: Vec3 | None = None + max_command_delta_mm: float = Field(default=4.0, gt=0) + max_rotation_delta_deg: float = Field(default=10.0, gt=0) + prohibit_patient_use: bool = True + + +class AutonomyRewardConfig(StrictConfigModel): + """Reward terms for simulation-only autonomy training.""" + + target_distance_weight: float = 1.0 + orientation_weight: float = 0.25 + smoothness_penalty: float = 0.05 + collision_penalty: float = 1.0 + workspace_violation_penalty: float = 1.0 + completion_reward: float = 10.0 + + +class DatasetSplitMetadataConfig(StrictConfigModel): + """Dataset split metadata.""" + + split: Literal["train", "validation", "test", "unspecified"] = "unspecified" + fold: int | None = Field(default=None, ge=0) + + +class ProvenanceMetadataConfig(StrictConfigModel): + """Freeform provenance metadata for runs and datasets.""" + + author: str | None = None + source: str | None = None + notes: str | None = None + + +class PilotMetadataConfig(StrictConfigModel): + """Supervised non-patient pilot metadata.""" + + use_mode: UseMode = "research" + risk_acknowledged: bool = False + operator_id: str | None = None + institution: str | None = None + reviewer: str | None = None + approved_protocol_id: str | None = None + intended_use: str = "non_patient_research" + warning: str = ( + "Supervised non-patient pilot mode only. Not for patient-care decisions. " + "Not validated for autonomous surgery." + ) + + class TargetZoneConfig(StrictConfigModel): """Geometric target zone for the first needle passing task.""" @@ -122,6 +265,18 @@ class SceneConfig(StrictConfigModel): camera: CameraConfig recorder: RecorderConfig evaluation: EvaluationConfig + tissue_geometry: TissueGeometryConfig = Field(default_factory=TissueGeometryConfig) + tissue_material: MaterialPropertiesConfig = Field(default_factory=MaterialPropertiesConfig) + solver: SolverParamsConfig = Field(default_factory=SolverParamsConfig) + collision: CollisionParamsConfig = Field(default_factory=CollisionParamsConfig) + needle: NeedleGeometryMaterialConfig = Field(default_factory=NeedleGeometryMaterialConfig) + tool_control: ToolControlParamsConfig = Field(default_factory=ToolControlParamsConfig) + lighting: LightingVisualParamsConfig = Field(default_factory=LightingVisualParamsConfig) + safety_boundaries: SafetyBoundariesConfig = Field(default_factory=SafetyBoundariesConfig) + autonomy_reward: AutonomyRewardConfig = Field(default_factory=AutonomyRewardConfig) + dataset_split: DatasetSplitMetadataConfig = Field(default_factory=DatasetSplitMetadataConfig) + provenance: ProvenanceMetadataConfig = Field(default_factory=ProvenanceMetadataConfig) + pilot: PilotMetadataConfig = Field(default_factory=PilotMetadataConfig) @field_validator("tools") @classmethod @@ -149,3 +304,11 @@ class ScenarioConfig(StrictConfigModel): description: str = "" tags: list[str] = Field(default_factory=list) perturbations: list[PerturbationConfig] = Field(default_factory=list) + domain_randomization: DomainRandomizationConfig = Field( + default_factory=DomainRandomizationConfig + ) + success_criteria: SuccessCriteriaConfig = Field(default_factory=SuccessCriteriaConfig) + safety_boundaries: SafetyBoundariesConfig = Field(default_factory=SafetyBoundariesConfig) + autonomy_reward: AutonomyRewardConfig = Field(default_factory=AutonomyRewardConfig) + dataset_split: DatasetSplitMetadataConfig = Field(default_factory=DatasetSplitMetadataConfig) + provenance: ProvenanceMetadataConfig = Field(default_factory=ProvenanceMetadataConfig) diff --git a/src/medsim/config/schema.py b/src/medsim/config/schema.py new file mode 100644 index 0000000..49d6208 --- /dev/null +++ b/src/medsim/config/schema.py @@ -0,0 +1,41 @@ +"""Production config schema exports.""" + +from __future__ import annotations + +from medsim.config.models import ( + AutonomyRewardConfig, + CollisionParamsConfig, + DatasetSplitMetadataConfig, + DomainRandomizationConfig, + LightingVisualParamsConfig, + MaterialPropertiesConfig, + NeedleGeometryMaterialConfig, + PilotMetadataConfig, + ProvenanceMetadataConfig, + SafetyBoundariesConfig, + ScenarioConfig, + SceneConfig, + SolverParamsConfig, + SuccessCriteriaConfig, + TissueGeometryConfig, + ToolControlParamsConfig, +) + +__all__ = [ + "AutonomyRewardConfig", + "CollisionParamsConfig", + "DatasetSplitMetadataConfig", + "DomainRandomizationConfig", + "LightingVisualParamsConfig", + "MaterialPropertiesConfig", + "NeedleGeometryMaterialConfig", + "PilotMetadataConfig", + "ProvenanceMetadataConfig", + "SafetyBoundariesConfig", + "ScenarioConfig", + "SceneConfig", + "SolverParamsConfig", + "SuccessCriteriaConfig", + "TissueGeometryConfig", + "ToolControlParamsConfig", +] diff --git a/src/medsim/data/artifacts.py b/src/medsim/data/artifacts.py new file mode 100644 index 0000000..9face76 --- /dev/null +++ b/src/medsim/data/artifacts.py @@ -0,0 +1,281 @@ +"""Run-level artifact bundle generation.""" + +from __future__ import annotations + +import hashlib +import platform +import sys +from datetime import UTC, datetime +from importlib import metadata +from pathlib import Path +from typing import Any + +from medsim import __version__ +from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.data.claims import artifact_claim_metadata +from medsim.data.exporters import write_json, write_jsonl +from medsim.data.schema import EpisodeSummary, StepRecord +from medsim.sim.backend_registry import get_backend_registry +from medsim.utils.paths import best_effort_git_commit + + +def write_run_artifact_bundle( + run_dir: Path, + scene_config: SceneConfig, + scenarios: list[ScenarioConfig], + summaries: list[EpisodeSummary], + metrics: dict[str, Any], +) -> dict[str, Path]: + """Write the Phase 5 run-level artifact bundle.""" + + claims = artifact_claim_metadata(scene_config.backend.name) + records = _load_episode_records(run_dir) + paths: dict[str, Path] = {} + + paths["run_summary"] = write_json( + run_dir / "run_summary.json", + { + "schema_version": "medsim.run_summary.v1", + **claims, + "run_id": run_dir.name, + "pilot_metadata": scene_config.pilot.model_dump(mode="json"), + "scenario_ids": [scenario.scenario_id for scenario in scenarios], + "episode_count": len(summaries), + "created_at_utc": datetime.now(UTC).isoformat(), + "outcomes": [summary.outcome_label for summary in summaries], + "metrics": metrics, + }, + ) + paths["scenario_resolved"] = write_json( + run_dir / "scenario_resolved.json", + { + "schema_version": "medsim.scenario_resolved.v1", + **claims, + "scene": scene_config.model_dump(mode="json"), + "scenarios": [scenario.model_dump(mode="json") for scenario in scenarios], + }, + ) + paths["backend_capabilities"] = write_json( + run_dir / "backend_capabilities.json", + get_backend_registry().capabilities(scene_config.backend.name), + ) + paths["environment"] = write_json(run_dir / "environment.json", _environment_payload(claims)) + paths["provenance"] = write_json( + run_dir / "provenance.json", + { + "schema_version": "medsim.provenance.v1", + **claims, + "run_id": run_dir.name, + "git_commit": best_effort_git_commit(run_dir), + "dirty_git_state": _dirty_git_state(run_dir), + "timestamp_utc": datetime.now(UTC).isoformat(), + "python_version": sys.version.split()[0], + "platform": platform.platform(), + "medsim_version": __version__, + }, + ) + paths["metrics"] = write_json(run_dir / "metrics.json", metrics) + paths["validation"] = write_json( + run_dir / "validation.json", + { + "schema_version": "medsim.validation_status.v1", + **claims, + "status": "not_validated", + "replay_validation": "pending", + "phantom_validation": "not_performed", + "clinical_validation": "not_performed", + }, + ) + replay_path = run_dir / "replay_validation.json" + if not replay_path.exists(): + paths["replay_validation"] = write_json( + replay_path, + { + "schema_version": "medsim.replay_validation_aggregate.v1", + **claims, + "aggregate": { + "validation_count": 0, + "pass_count": 0, + "fail_count": 0, + "pass_rate": None, + "mismatch_reason_counts": {}, + "mismatch_rate_by_scenario": {}, + }, + "results": [], + }, + ) + + paths.update(_write_streams(run_dir, records, claims)) + checksums = _checksums_for_run(run_dir) + paths["checksums"] = write_json( + run_dir / "checksums.json", + { + "schema_version": "medsim.checksums.v1", + **claims, + "algorithm": "sha256", + "files": checksums, + }, + ) + manifest_entries = _manifest_entries(run_dir, checksums) + paths["artifact_manifest"] = write_json( + run_dir / "artifact_manifest.json", + { + "schema_version": "medsim.artifact_manifest.v1", + **claims, + "run_id": run_dir.name, + "artifact_count": len(manifest_entries), + "artifacts": manifest_entries, + }, + ) + return paths + + +def _load_episode_records(run_dir: Path) -> list[StepRecord]: + records: list[StepRecord] = [] + for path in sorted((run_dir / "episodes").glob("episode_*.jsonl")): + with path.open("r", encoding="utf-8") as handle: + for line in handle: + if line.strip(): + records.append(StepRecord.model_validate_json(line)) + return records + + +def _write_streams( + run_dir: Path, + records: list[StepRecord], + claims: dict[str, Any], +) -> dict[str, Path]: + streams: dict[str, list[dict[str, Any]]] = { + "states": [], + "events": [], + "commands": [], + "contacts": [], + "observations": [], + "actions": [], + "rewards": [], + } + for record in records: + base = { + **claims, + "episode_id": record.episode_id, + "scenario_id": record.scenario_id, + "seed": record.seed, + "step_index": record.step_index, + "timestamp_utc": record.timestamp_utc.isoformat(), + } + streams["states"].append( + { + "schema_version": "medsim.state_stream.v1", + **base, + "tool_poses": record.tool_poses, + "needle_pose": record.needle_pose, + "target_info": record.target_info, + "camera_condition": record.camera_condition, + "outcome_label": record.outcome_label, + "failure_reason": record.failure_reason, + } + ) + streams["observations"].append( + { + "schema_version": "medsim.observation_stream.v1", + **base, + "tool_poses": record.tool_poses, + "needle_pose": record.needle_pose, + "camera_condition": record.camera_condition, + } + ) + action_payload = { + "schema_version": "medsim.action_stream.v1", + **base, + "action": record.action, + } + streams["actions"].append(action_payload) + streams["commands"].append({**action_payload, "schema_version": "medsim.command_stream.v1"}) + streams["rewards"].append( + { + "schema_version": "medsim.reward_stream.v1", + **base, + "reward": 1.0 if record.outcome_label == "success" else 0.0, + "source": "placeholder_terminal_success_indicator", + } + ) + for event in record.events: + streams["events"].append( + { + "schema_version": "medsim.event_stream.v1", + **base, + **event, + } + ) + for contact in record.contacts: + streams["contacts"].append( + { + "schema_version": "medsim.contact_stream.v1", + **base, + **contact, + } + ) + + return { + name: write_jsonl(run_dir / f"{name}.jsonl", rows) + for name, rows in streams.items() + } + + +def _environment_payload(claims: dict[str, Any]) -> dict[str, Any]: + dependencies = {} + for package in ("pydantic", "PyYAML", "fastapi", "uvicorn", "pytest"): + try: + dependencies[package] = metadata.version(package) + except metadata.PackageNotFoundError: + continue + return { + "schema_version": "medsim.environment.v1", + **claims, + "medsim_version": __version__, + "python_version": sys.version.split()[0], + "platform": platform.platform(), + "dependency_versions": dependencies, + } + + +def _dirty_git_state(run_dir: Path) -> bool | None: + import subprocess + + try: + result = subprocess.run( + ["git", "-C", str(run_dir), "status", "--short"], + check=False, + capture_output=True, + text=True, + timeout=2, + ) + except (OSError, subprocess.SubprocessError): + return None + if result.returncode != 0: + return None + return bool(result.stdout.strip()) + + +def _checksums_for_run(run_dir: Path) -> dict[str, str]: + checksums: dict[str, str] = {} + for path in sorted(run_dir.rglob("*")): + if not path.is_file() or path.name == "checksums.json": + continue + rel = path.relative_to(run_dir).as_posix() + checksums[rel] = hashlib.sha256(path.read_bytes()).hexdigest() + return checksums + + +def _manifest_entries(run_dir: Path, checksums: dict[str, str]) -> list[dict[str, Any]]: + entries = [] + for rel, checksum in sorted(checksums.items()): + path = run_dir / rel + entries.append( + { + "path": rel, + "bytes": path.stat().st_size, + "sha256": checksum, + } + ) + return entries diff --git a/src/medsim/data/claims.py b/src/medsim/data/claims.py new file mode 100644 index 0000000..d002a3d --- /dev/null +++ b/src/medsim/data/claims.py @@ -0,0 +1,46 @@ +"""Artifact claim-boundary metadata helpers.""" + +from __future__ import annotations + +from typing import Any + +from medsim.sim.backend_registry import get_backend_registry + +DISALLOWED_USES = [ + "patient_care", + "clinical_decision_support", + "live_autonomous_surgery", + "regulated_deployment_without_clearance", +] + + +def artifact_claim_metadata(backend_name: str) -> dict[str, Any]: + """Return claim-boundary metadata that should appear in run artifacts.""" + + capabilities = get_backend_registry().capabilities(backend_name) + intended_use = ( + "infrastructure_validation" + if capabilities.name == "placeholder" + else "backend_adapter_preflight" + ) + return { + "backend": capabilities.name, + "backend_version": capabilities.version, + "fidelity_level": str(capabilities.fidelity_level), + "physical_accuracy": bool(capabilities.real_physics), + "intended_use": intended_use, + "disallowed_uses": list(DISALLOWED_USES), + } + + +def default_placeholder_claim_metadata() -> dict[str, Any]: + """Return static placeholder defaults for backward-compatible schema defaults.""" + + return { + "backend": "placeholder", + "backend_version": "0.1.0", + "fidelity_level": "placeholder_deterministic", + "physical_accuracy": False, + "intended_use": "infrastructure_validation", + "disallowed_uses": list(DISALLOWED_USES), + } diff --git a/src/medsim/data/dataset.py b/src/medsim/data/dataset.py new file mode 100644 index 0000000..3b0f315 --- /dev/null +++ b/src/medsim/data/dataset.py @@ -0,0 +1,184 @@ +"""Dataset indexing, export, validation, and summary tools.""" + +from __future__ import annotations + +import csv +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from medsim.data.exporters import write_json, write_jsonl +from medsim.data.schema import StepRecord + + +def dataset_index(runs_root: Path | str) -> dict[str, Any]: + """Index run directories under an artifact root.""" + + root = Path(runs_root) + runs = [] + for run_dir in _run_dirs(root): + manifest_path = run_dir / "run_manifest.json" + summary_path = run_dir / "run_summary.json" + runs.append( + { + "run_id": run_dir.name, + "path": str(run_dir), + "has_manifest": manifest_path.exists(), + "has_run_summary": summary_path.exists(), + "episode_count": len(list((run_dir / "episodes").glob("episode_*_summary.json"))), + } + ) + return { + "schema_version": "medsim.dataset_index.v1", + "runs_root": str(root), + "created_at_utc": datetime.now(UTC).isoformat(), + "run_count": len(runs), + "runs": runs, + } + + +def dataset_summary(runs_root: Path | str) -> dict[str, Any]: + """Summarize available runs and episodes.""" + + index = dataset_index(runs_root) + return { + "schema_version": "medsim.dataset_summary.v1", + "runs_root": index["runs_root"], + "run_count": index["run_count"], + "episode_count": sum(run["episode_count"] for run in index["runs"]), + "runs_with_manifest": sum(1 for run in index["runs"] if run["has_manifest"]), + "runs_with_run_summary": sum(1 for run in index["runs"] if run["has_run_summary"]), + } + + +def export_dataset(runs_root: Path | str, out_dir: Path | str) -> dict[str, Any]: + """Export generic JSONL, imitation-learning, RL, CSV, and manifest files.""" + + root = Path(runs_root) + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + records = _load_records(root) + write_jsonl(out / "episodes.jsonl", [record.model_dump(mode="json") for record in records]) + write_jsonl(out / "imitation_learning.jsonl", _imitation_rows(records)) + write_jsonl(out / "rl_transitions.jsonl", _rl_rows(records)) + _write_metadata_csv(out / "metadata.csv", records) + manifest = { + "schema_version": "medsim.dataset_manifest.v1", + "created_at_utc": datetime.now(UTC).isoformat(), + "runs_root": str(root), + "record_count": len(records), + "formats": { + "generic_jsonl": "episodes.jsonl", + "imitation_learning": "imitation_learning.jsonl", + "rl_transitions": "rl_transitions.jsonl", + "metadata_csv": "metadata.csv", + }, + } + write_json(out / "manifest.json", manifest) + return manifest + + +def validate_dataset(dataset_dir: Path | str) -> dict[str, Any]: + """Validate a dataset export directory.""" + + root = Path(dataset_dir) + required = [ + "manifest.json", + "episodes.jsonl", + "imitation_learning.jsonl", + "rl_transitions.jsonl", + "metadata.csv", + ] + missing = [name for name in required if not (root / name).exists()] + empty = [ + name + for name in required + if (root / name).exists() and (root / name).stat().st_size == 0 + ] + return { + "schema_version": "medsim.dataset_validation.v1", + "dataset_dir": str(root), + "valid": not missing and not empty, + "missing_files": missing, + "empty_files": empty, + } + + +def _run_dirs(root: Path) -> list[Path]: + if root.name.startswith("run_") and root.is_dir(): + return [root] + return sorted(path for path in root.glob("run_*") if path.is_dir()) + + +def _load_records(root: Path) -> list[StepRecord]: + records = [] + for run_dir in _run_dirs(root): + for path in sorted((run_dir / "episodes").glob("episode_*.jsonl")): + with path.open("r", encoding="utf-8") as handle: + for line in handle: + if line.strip(): + records.append(StepRecord.model_validate_json(line)) + return records + + +def _imitation_rows(records: list[StepRecord]) -> list[dict[str, Any]]: + return [ + { + "schema_version": "medsim.imitation_row.v1", + "episode_id": record.episode_id, + "scenario_id": record.scenario_id, + "step_index": record.step_index, + "observation": { + "tool_poses": record.tool_poses, + "needle_pose": record.needle_pose, + "camera_condition": record.camera_condition, + }, + "action": record.action, + } + for record in records + ] + + +def _rl_rows(records: list[StepRecord]) -> list[dict[str, Any]]: + rows = [] + by_episode: dict[str, list[StepRecord]] = {} + for record in records: + by_episode.setdefault(record.episode_id, []).append(record) + for episode_records in by_episode.values(): + ordered = sorted(episode_records, key=lambda record: record.step_index) + for current, nxt in zip(ordered, ordered[1:], strict=False): + rows.append( + { + "schema_version": "medsim.rl_transition.v1", + "episode_id": current.episode_id, + "scenario_id": current.scenario_id, + "step_index": current.step_index, + "observation": current.tool_poses, + "action": nxt.action, + "reward": 1.0 if nxt.outcome_label == "success" else 0.0, + "next_observation": nxt.tool_poses, + "done": nxt.outcome_label is not None, + } + ) + return rows + + +def _write_metadata_csv(path: Path, records: list[StepRecord]) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8", newline="") as handle: + writer = csv.DictWriter( + handle, + fieldnames=["episode_id", "scenario_id", "seed", "step_index", "outcome_label"], + ) + writer.writeheader() + for record in records: + writer.writerow( + { + "episode_id": record.episode_id, + "scenario_id": record.scenario_id, + "seed": record.seed, + "step_index": record.step_index, + "outcome_label": record.outcome_label or "", + } + ) + return path diff --git a/src/medsim/data/recorder.py b/src/medsim/data/recorder.py index ddddfa7..01e1dad 100644 --- a/src/medsim/data/recorder.py +++ b/src/medsim/data/recorder.py @@ -5,14 +5,17 @@ from collections import Counter from datetime import UTC, datetime from pathlib import Path +from typing import Any from medsim.config.models import ScenarioConfig +from medsim.data.claims import artifact_claim_metadata from medsim.data.exporters import write_json, write_jsonl from medsim.data.schema import EpisodeSummary, StepRecord from medsim.scenarios.perturbations import PerturbationEffects from medsim.sim.events import SimEvent from medsim.sim.models import Action, Observation, StepResult from medsim.sim.state import SimulationState +from medsim.sim.taxonomy import EventType class EpisodeRecorder: @@ -26,6 +29,7 @@ def __init__( output_dir: Path, perturbation_effects: PerturbationEffects, config_snapshot: Path | None = None, + artifact_metadata: dict[str, Any] | None = None, ) -> None: self.episode_id = episode_id self.seed = seed @@ -33,6 +37,7 @@ def __init__( self.output_dir = output_dir self.perturbation_effects = perturbation_effects self.config_snapshot = config_snapshot + self.artifact_metadata = artifact_metadata or artifact_claim_metadata("placeholder") self.started_at_utc = datetime.now(UTC) self.records: list[StepRecord] = [] self._event_counts: Counter[str] = Counter() @@ -68,6 +73,7 @@ def finalize(self, final_state: SimulationState) -> EpisodeSummary: "min_camera_quality": self._min_camera_quality, } return EpisodeSummary( + **self.artifact_metadata, episode_id=self.episode_id, seed=self.seed, scenario_id=self.scenario.scenario_id, @@ -119,6 +125,7 @@ def _append_record( self._min_camera_quality = min(self._min_camera_quality, observation.camera_quality) self.records.append( StepRecord( + **self.artifact_metadata, episode_id=self.episode_id, seed=self.seed, scenario_id=self.scenario.scenario_id, @@ -129,8 +136,52 @@ def _append_record( needle_pose=observation.needle_pose, target_info=[target.to_dict() for target in state.targets], camera_condition=state.camera.to_dict(), + contacts=self._placeholder_contacts(state, events), + deformation_summary=self._placeholder_deformation_summary(), events=event_dicts, outcome_label=state.outcome, failure_reason=state.failure_reason, ) ) + + def _placeholder_contacts( + self, + state: SimulationState, + events: list[SimEvent], + ) -> list[dict[str, Any]]: + contacts: list[dict[str, Any]] = [] + for event in events: + if event.event_type != EventType.TOOL_COLLISION_DETECTED: + continue + contacts.append( + { + "schema_version": "medsim.placeholder_contact.v1", + "source": "placeholder_synthetic", + "physical_accuracy": False, + "step_index": state.step_index, + "event_type": str(event.event_type), + "force_n": None, + "contact_duration_s": None, + "metadata": dict(event.metadata), + "limitations": [ + "Synthetic collision marker from placeholder state machine.", + "No physical force, contact patch, or tissue mechanics are modeled.", + ], + } + ) + return contacts + + def _placeholder_deformation_summary(self) -> dict[str, Any]: + return { + "schema_version": "medsim.placeholder_deformation_summary.v1", + "source": "placeholder_synthetic", + "physical_accuracy": False, + "max_displacement_mm": 0.0, + "mean_displacement_mm": 0.0, + "node_count": 0, + "target_shift_mm": list(self.perturbation_effects.target_shift_mm), + "limitations": [ + "Placeholder backend does not simulate deformable tissue.", + "Values are infrastructure-validation channels only.", + ], + } diff --git a/src/medsim/data/schema.py b/src/medsim/data/schema.py index 4795b84..8e0aa94 100644 --- a/src/medsim/data/schema.py +++ b/src/medsim/data/schema.py @@ -7,6 +7,12 @@ from pydantic import BaseModel, ConfigDict, Field +from medsim.data.claims import default_placeholder_claim_metadata + + +def _default_claims() -> dict[str, Any]: + return default_placeholder_claim_metadata() + class StrictRecord(BaseModel): """Base record that rejects unexpected fields.""" @@ -18,6 +24,12 @@ class StepRecord(StrictRecord): """One step of an episode trace.""" schema_version: str = "medsim.step_record.v1" + backend: str = Field(default_factory=lambda: _default_claims()["backend"]) + backend_version: str = Field(default_factory=lambda: _default_claims()["backend_version"]) + fidelity_level: str = Field(default_factory=lambda: _default_claims()["fidelity_level"]) + physical_accuracy: bool = Field(default_factory=lambda: _default_claims()["physical_accuracy"]) + intended_use: str = Field(default_factory=lambda: _default_claims()["intended_use"]) + disallowed_uses: list[str] = Field(default_factory=lambda: _default_claims()["disallowed_uses"]) episode_id: str seed: int scenario_id: str @@ -28,6 +40,8 @@ class StepRecord(StrictRecord): needle_pose: list[float] target_info: list[dict[str, Any]] camera_condition: dict[str, Any] + contacts: list[dict[str, Any]] = Field(default_factory=list) + deformation_summary: dict[str, Any] = Field(default_factory=dict) events: list[dict[str, Any]] = Field(default_factory=list) outcome_label: str | None = None failure_reason: str | None = None @@ -37,6 +51,12 @@ class EpisodeSummary(StrictRecord): """Summary record for one episode.""" schema_version: str = "medsim.episode_summary.v1" + backend: str = Field(default_factory=lambda: _default_claims()["backend"]) + backend_version: str = Field(default_factory=lambda: _default_claims()["backend_version"]) + fidelity_level: str = Field(default_factory=lambda: _default_claims()["fidelity_level"]) + physical_accuracy: bool = Field(default_factory=lambda: _default_claims()["physical_accuracy"]) + intended_use: str = Field(default_factory=lambda: _default_claims()["intended_use"]) + disallowed_uses: list[str] = Field(default_factory=lambda: _default_claims()["disallowed_uses"]) episode_id: str seed: int scenario_id: str @@ -59,6 +79,18 @@ class RunManifest(StrictRecord): schema_version: str = "medsim.run_manifest.v1" artifact_layout_version: str = "medsim.artifacts.v1" + backend: str = Field(default_factory=lambda: _default_claims()["backend"]) + backend_version: str = Field(default_factory=lambda: _default_claims()["backend_version"]) + fidelity_level: str = Field(default_factory=lambda: _default_claims()["fidelity_level"]) + physical_accuracy: bool = Field(default_factory=lambda: _default_claims()["physical_accuracy"]) + intended_use: str = Field(default_factory=lambda: _default_claims()["intended_use"]) + disallowed_uses: list[str] = Field(default_factory=lambda: _default_claims()["disallowed_uses"]) + use_mode: str = "research" + risk_acknowledged: bool = False + operator_id: str | None = None + institution: str | None = None + reviewer: str | None = None + approved_protocol_id: str | None = None run_id: str created_at_utc: datetime medsim_version: str @@ -80,6 +112,12 @@ class ReplayValidationResult(StrictRecord): """Structured result for deterministic replay validation.""" schema_version: str = "medsim.replay_validation.v1" + backend: str = Field(default_factory=lambda: _default_claims()["backend"]) + backend_version: str = Field(default_factory=lambda: _default_claims()["backend_version"]) + fidelity_level: str = Field(default_factory=lambda: _default_claims()["fidelity_level"]) + physical_accuracy: bool = Field(default_factory=lambda: _default_claims()["physical_accuracy"]) + intended_use: str = Field(default_factory=lambda: _default_claims()["intended_use"]) + disallowed_uses: list[str] = Field(default_factory=lambda: _default_claims()["disallowed_uses"]) episode_id: str scenario_id: str seed: int diff --git a/src/medsim/eval/metrics.py b/src/medsim/eval/metrics.py index 2b42ada..b1aca10 100644 --- a/src/medsim/eval/metrics.py +++ b/src/medsim/eval/metrics.py @@ -8,6 +8,7 @@ from statistics import mean from typing import Any +from medsim.data.claims import default_placeholder_claim_metadata from medsim.data.schema import EpisodeSummary, ReplayValidationResult @@ -45,6 +46,7 @@ def aggregate_summaries(summaries: list[EpisodeSummary]) -> dict[str, Any]: if total == 0: return { "schema_version": "medsim.aggregate_metrics.v1", + **default_placeholder_claim_metadata(), "episode_count": 0, "success_count": 0, "failure_count": 0, @@ -92,6 +94,7 @@ def aggregate_summaries(summaries: list[EpisodeSummary]) -> dict[str, Any]: return { "schema_version": "medsim.aggregate_metrics.v1", + **_claims_from_summary(summaries[0]), "episode_count": total, "success_count": success_count, "failure_count": failure_count, @@ -167,3 +170,14 @@ def aggregate_replay_validations( def _counter_map_to_dict(counter_map: dict[str, Counter[str]]) -> dict[str, dict[str, int]]: return {key: dict(sorted(counter.items())) for key, counter in sorted(counter_map.items())} + + +def _claims_from_summary(summary: EpisodeSummary) -> dict[str, Any]: + return { + "backend": summary.backend, + "backend_version": summary.backend_version, + "fidelity_level": summary.fidelity_level, + "physical_accuracy": summary.physical_accuracy, + "intended_use": summary.intended_use, + "disallowed_uses": list(summary.disallowed_uses), + } diff --git a/src/medsim/eval/replay.py b/src/medsim/eval/replay.py index bf482f2..1fba712 100644 --- a/src/medsim/eval/replay.py +++ b/src/medsim/eval/replay.py @@ -76,6 +76,12 @@ def validate_replay( } mismatch_reasons = [name for name, passed in comparisons.items() if not passed] result = ReplayValidationResult( + backend=summary.backend, + backend_version=summary.backend_version, + fidelity_level=summary.fidelity_level, + physical_accuracy=summary.physical_accuracy, + intended_use=summary.intended_use, + disallowed_uses=list(summary.disallowed_uses), episode_id=summary.episode_id, scenario_id=summary.scenario_id, seed=summary.seed, diff --git a/src/medsim/eval/runner.py b/src/medsim/eval/runner.py new file mode 100644 index 0000000..f876c68 --- /dev/null +++ b/src/medsim/eval/runner.py @@ -0,0 +1,157 @@ +"""Evaluation report generation and run comparison.""" + +from __future__ import annotations + +import csv +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +from medsim.data.exporters import write_json +from medsim.eval.metrics import aggregate_run, load_episode_summaries + + +def run_eval_report(runs_root: Path | str, out_dir: Path | str) -> dict[str, Any]: + """Evaluate one run or a directory of runs and write report artifacts.""" + + runs = _run_dirs(Path(runs_root)) + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + run_rows = [] + failures = [] + lengths = [] + success_rates = [] + for run_dir in runs: + aggregate = aggregate_run(run_dir) + run_rows.append( + { + "run_id": run_dir.name, + "path": str(run_dir), + "episode_count": aggregate["episode_count"], + "success_rate": aggregate["success_rate"], + "failure_count": aggregate["failure_count"], + "replay_pass_rate": aggregate["replay_validation"]["pass_rate"], + } + ) + success_rates.append(aggregate["success_rate"]) + if aggregate["episode_length"]["mean"] is not None: + lengths.append(aggregate["episode_length"]["mean"]) + for summary in load_episode_summaries(run_dir): + if summary.outcome_label != "success": + failures.append( + { + "run_id": run_dir.name, + "episode_id": summary.episode_id, + "scenario_id": summary.scenario_id, + "failure_reason": summary.failure_reason, + } + ) + + summary_payload = { + "schema_version": "medsim.eval_summary.v1", + "eval_id": Path(out).name, + "created_at_utc": datetime.now(UTC).isoformat(), + "run_count": len(runs), + "episode_count": sum(int(row["episode_count"]) for row in run_rows), + "mean_success_rate": _mean(success_rates), + "failure_count": len(failures), + } + distributions = { + "schema_version": "medsim.metric_distributions.v1", + "success_rates": success_rates, + "episode_length_mean_by_run": lengths, + } + write_json(out / "eval_summary.json", summary_payload) + write_json( + out / "failures.json", + {"schema_version": "medsim.eval_failures.v1", "failures": failures}, + ) + write_json(out / "metric_distributions.json", distributions) + _write_runs_csv(out / "runs.csv", run_rows) + _write_report(out / "eval_report.md", summary_payload, run_rows, failures) + return summary_payload + + +def compare_runs(run_a: Path | str, run_b: Path | str) -> dict[str, Any]: + """Compare aggregate metrics for two runs.""" + + left = Path(run_a) + right = Path(run_b) + aggregate_a = aggregate_run(left) + aggregate_b = aggregate_run(right) + return { + "schema_version": "medsim.run_comparison.v1", + "run_a": left.name, + "run_b": right.name, + "success_rate_delta": aggregate_b["success_rate"] - aggregate_a["success_rate"], + "failure_count_delta": aggregate_b["failure_count"] - aggregate_a["failure_count"], + "episode_count_delta": aggregate_b["episode_count"] - aggregate_a["episode_count"], + "run_a_metrics": aggregate_a, + "run_b_metrics": aggregate_b, + } + + +def _run_dirs(root: Path) -> list[Path]: + if root.name.startswith("run_") and root.is_dir(): + return [root] + return sorted(path for path in root.glob("run_*") if path.is_dir()) + + +def _mean(values: list[float]) -> float | None: + if not values: + return None + return sum(values) / len(values) + + +def _write_runs_csv(path: Path, rows: list[dict[str, Any]]) -> Path: + path.parent.mkdir(parents=True, exist_ok=True) + with path.open("w", encoding="utf-8", newline="") as handle: + fieldnames = [ + "run_id", + "path", + "episode_count", + "success_rate", + "failure_count", + "replay_pass_rate", + ] + writer = csv.DictWriter(handle, fieldnames=fieldnames) + writer.writeheader() + for row in rows: + writer.writerow(row) + return path + + +def _write_report( + path: Path, + summary: dict[str, Any], + run_rows: list[dict[str, Any]], + failures: list[dict[str, Any]], +) -> Path: + lines = [ + "# MedSim Evaluation Report", + "", + f"- Eval ID: `{summary['eval_id']}`", + f"- Runs: {summary['run_count']}", + f"- Episodes: {summary['episode_count']}", + f"- Mean success rate: {summary['mean_success_rate']}", + f"- Failures: {summary['failure_count']}", + "", + "## Runs", + "", + ] + for row in run_rows: + lines.append( + f"- `{row['run_id']}`: episodes={row['episode_count']} " + f"success_rate={row['success_rate']} failures={row['failure_count']}" + ) + lines.extend(["", "## Failures", ""]) + if failures: + for failure in failures: + lines.append( + f"- `{failure['run_id']}/{failure['episode_id']}` " + f"{failure['scenario_id']}: {failure['failure_reason']}" + ) + else: + lines.append("- None") + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path diff --git a/src/medsim/qms/__init__.py b/src/medsim/qms/__init__.py new file mode 100644 index 0000000..caa3fe3 --- /dev/null +++ b/src/medsim/qms/__init__.py @@ -0,0 +1,5 @@ +"""QMS readiness helpers.""" + +from medsim.qms.runner import export_qms, trace_qms, verify_qms + +__all__ = ["export_qms", "trace_qms", "verify_qms"] diff --git a/src/medsim/qms/runner.py b/src/medsim/qms/runner.py new file mode 100644 index 0000000..e58f7d5 --- /dev/null +++ b/src/medsim/qms/runner.py @@ -0,0 +1,177 @@ +"""QMS traceability and verification utilities.""" + +from __future__ import annotations + +import shutil +from datetime import UTC, datetime +from pathlib import Path +from typing import Any + +import yaml + +from medsim.data.exporters import write_json + +QMS_ROOT = Path("docs/qms") +REQUIREMENTS_PATH = QMS_ROOT / "REQUIREMENTS.yaml" + + +def load_qms(path: Path | str = REQUIREMENTS_PATH) -> dict[str, Any]: + """Load QMS requirements/risk YAML.""" + + with Path(path).open("r", encoding="utf-8") as handle: + payload = yaml.safe_load(handle) + if not isinstance(payload, dict): + raise ValueError(f"Expected QMS mapping in {path}") + payload.setdefault("requirements", []) + payload.setdefault("risks", []) + return payload + + +def trace_qms( + *, + requirements_path: Path | str = REQUIREMENTS_PATH, + out_path: Path | str | None = QMS_ROOT / "TRACEABILITY_MATRIX.md", +) -> dict[str, Any]: + """Generate a traceability payload and optional markdown report.""" + + qms = load_qms(requirements_path) + requirements = qms["requirements"] + trace_rows = [ + { + "id": requirement["id"], + "title": requirement["title"], + "status": requirement.get("status", "unknown"), + "implementation_files": requirement.get("implementation_files", []), + "tests": requirement.get("tests", []), + "verification_method": requirement.get("verification_method"), + "release_blocking": bool(requirement.get("release_blocking", False)), + } + for requirement in requirements + ] + payload = { + "schema_version": "medsim.qms_traceability.v1", + "generated_at_utc": datetime.now(UTC).isoformat(), + "requirement_count": len(trace_rows), + "trace_rows": trace_rows, + } + if out_path is not None: + _write_traceability_markdown(Path(out_path), payload) + return payload + + +def verify_qms(requirements_path: Path | str = REQUIREMENTS_PATH) -> dict[str, Any]: + """Verify release-blocking QMS requirements and critical risk mitigations.""" + + qms = load_qms(requirements_path) + findings = [] + for requirement in qms["requirements"]: + if not requirement.get("release_blocking", False): + continue + if not requirement.get("verification_method"): + findings.append( + { + "severity": "error", + "code": "missing_verification_method", + "id": requirement.get("id"), + "message": "Release-blocking requirement lacks verification method.", + } + ) + if not requirement.get("tests"): + findings.append( + { + "severity": "error", + "code": "missing_tests", + "id": requirement.get("id"), + "message": "Release-blocking requirement lacks tests.", + } + ) + for risk in qms["risks"]: + if risk.get("severity") != "critical": + continue + if not risk.get("mitigation"): + findings.append( + { + "severity": "error", + "code": "critical_risk_missing_mitigation", + "id": risk.get("id"), + "message": "Critical risk lacks mitigation.", + } + ) + claim_findings = _scan_false_certification_claims(QMS_ROOT) + findings.extend(claim_findings) + payload = { + "schema_version": "medsim.qms_verification.v1", + "verified_at_utc": datetime.now(UTC).isoformat(), + "passed": not any(finding["severity"] == "error" for finding in findings), + "finding_count": len(findings), + "findings": findings, + } + write_json(QMS_ROOT / "qms_verification_report.json", payload) + return payload + + +def export_qms(out_dir: Path | str, requirements_path: Path | str = REQUIREMENTS_PATH) -> dict[str, Any]: + """Export QMS docs plus trace/verify payloads.""" + + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + copied = [] + for path in sorted(QMS_ROOT.glob("*")): + if path.is_file(): + shutil.copy2(path, out / path.name) + copied.append(path.name) + trace = trace_qms(requirements_path=requirements_path, out_path=out / "TRACEABILITY_MATRIX.md") + verify = verify_qms(requirements_path=requirements_path) + write_json(out / "qms_traceability.json", trace) + write_json(out / "qms_verification_report.json", verify) + payload = { + "schema_version": "medsim.qms_export.v1", + "exported_at_utc": datetime.now(UTC).isoformat(), + "out_dir": str(out), + "copied_files": copied, + "verification_passed": verify["passed"], + } + write_json(out / "qms_export_manifest.json", payload) + return payload + + +def _write_traceability_markdown(path: Path, payload: dict[str, Any]) -> Path: + lines = [ + "# Traceability Matrix", + "", + f"Generated: {payload['generated_at_utc']}", + "", + "| Requirement | Status | Verification | Implementation | Tests |", + "| --- | --- | --- | --- | --- |", + ] + for row in payload["trace_rows"]: + lines.append( + "| {id} - {title} | {status} | {verification_method} | {impl} | {tests} |".format( + id=row["id"], + title=row["title"], + status=row["status"], + verification_method=row["verification_method"], + impl=", ".join(row["implementation_files"]), + tests=", ".join(row["tests"]), + ) + ) + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path + + +def _scan_false_certification_claims(root: Path) -> list[dict[str, Any]]: + blocked_phrases = ["FDA-cleared", "clinically validated", "patient-care ready"] + findings = [] + for path in sorted(root.glob("*.md")): + text = path.read_text(encoding="utf-8") + for phrase in blocked_phrases: + if phrase in text and "Disallowed" not in text and "No clinical validation" not in text: + findings.append( + { + "severity": "error", + "code": "blocked_claim_phrase", + "id": str(path), + "message": f"Potential unsupported claim phrase found: {phrase}", + } + ) + return findings diff --git a/src/medsim/robotics/__init__.py b/src/medsim/robotics/__init__.py new file mode 100644 index 0000000..88f854b --- /dev/null +++ b/src/medsim/robotics/__init__.py @@ -0,0 +1,7 @@ +"""Dry-run-only robot bench integration interfaces.""" + +from medsim.robotics.dry_run import DryRunRobotAdapter +from medsim.robotics.interface import RobotCommand, RobotCommandPreview +from medsim.robotics.safety import RobotSafetyLimits + +__all__ = ["DryRunRobotAdapter", "RobotCommand", "RobotCommandPreview", "RobotSafetyLimits"] diff --git a/src/medsim/robotics/calibration.py b/src/medsim/robotics/calibration.py new file mode 100644 index 0000000..213293d --- /dev/null +++ b/src/medsim/robotics/calibration.py @@ -0,0 +1,19 @@ +"""Calibration metadata for future bench adapters.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class CalibrationRequirement(BaseModel): + """Required calibration evidence before hardware adapters can be enabled.""" + + model_config = ConfigDict(extra="forbid") + + schema_version: str = "medsim.robot_calibration_requirement.v1" + workspace_registration_required: bool = True + tool_tip_calibration_required: bool = True + camera_tracker_calibration_required: bool = True + emergency_stop_test_required: bool = True + approved_protocol_id: str | None = None + evidence_paths: list[str] = Field(default_factory=list) diff --git a/src/medsim/robotics/dry_run.py b/src/medsim/robotics/dry_run.py new file mode 100644 index 0000000..47eb0ae --- /dev/null +++ b/src/medsim/robotics/dry_run.py @@ -0,0 +1,27 @@ +"""Dry-run robot adapter.""" + +from __future__ import annotations + +from medsim.robotics.interface import RobotCommand, RobotCommandPreview +from medsim.robotics.safety import RobotSafetyLimits + + +class DryRunRobotAdapter: + """Preview robot commands without hardware actuation.""" + + def __init__(self, limits: RobotSafetyLimits | None = None) -> None: + self.limits = limits or RobotSafetyLimits() + + def preview(self, command: RobotCommand) -> RobotCommandPreview: + messages = self.limits.validate_command(command) + accepted = not any( + message + for message in messages + if "exceeds" in message or "NO PATIENT USE" in message + ) + return RobotCommandPreview( + command=command, + accepted=accepted, + would_actuate=False, + messages=messages, + ) diff --git a/src/medsim/robotics/interface.py b/src/medsim/robotics/interface.py new file mode 100644 index 0000000..465052d --- /dev/null +++ b/src/medsim/robotics/interface.py @@ -0,0 +1,36 @@ +"""Robot bench command interfaces.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Literal + +from pydantic import BaseModel, ConfigDict, Field + + +class RobotModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + +class RobotCommand(RobotModel): + """Dry-run command payload for future bench adapters.""" + + schema_version: str = "medsim.robot_command.v1" + tool_id: str + delta_mm: list[float] = Field(min_length=3, max_length=3) + delta_rotation_deg: list[float] = Field(default_factory=lambda: [0.0, 0.0, 0.0]) + gripper: Literal["open", "close", "hold"] = "hold" + emergency_stop: bool = False + no_patient_use: bool = True + + +class RobotCommandPreview(RobotModel): + """Result of dry-run command preview.""" + + schema_version: str = "medsim.robot_command_preview.v1" + created_at_utc: datetime = Field(default_factory=lambda: datetime.now(UTC)) + command: RobotCommand + accepted: bool + would_actuate: bool = False + no_patient_use: bool = True + messages: list[str] = Field(default_factory=list) diff --git a/src/medsim/robotics/safety.py b/src/medsim/robotics/safety.py new file mode 100644 index 0000000..641185b --- /dev/null +++ b/src/medsim/robotics/safety.py @@ -0,0 +1,34 @@ +"""Robot bench safety limits.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + +from medsim.robotics.interface import RobotCommand + + +class RobotSafetyLimits(BaseModel): + """Hard limits for dry-run robot command previews.""" + + model_config = ConfigDict(extra="forbid") + + max_delta_mm: float = Field(default=2.0, gt=0) + max_rotation_deg: float = Field(default=5.0, gt=0) + hardware_enabled: bool = False + emergency_stop_required: bool = True + calibration_required: bool = True + no_patient_use: bool = True + + def validate_command(self, command: RobotCommand) -> list[str]: + messages = [] + if not command.no_patient_use: + messages.append("Command must be explicitly marked NO PATIENT USE.") + if any(abs(value) > self.max_delta_mm for value in command.delta_mm): + messages.append("Command exceeds max delta limit.") + if any(abs(value) > self.max_rotation_deg for value in command.delta_rotation_deg): + messages.append("Command exceeds max rotation limit.") + if command.emergency_stop: + messages.append("Emergency stop command; no actuation allowed.") + if not self.hardware_enabled: + messages.append("Hardware adapters are disabled; dry-run preview only.") + return messages diff --git a/src/medsim/sim/__init__.py b/src/medsim/sim/__init__.py index 9c3bc1b..89b1451 100644 --- a/src/medsim/sim/__init__.py +++ b/src/medsim/sim/__init__.py @@ -1,19 +1,47 @@ """Simulation contracts and runtime implementations.""" from medsim.sim.backend import BackendCapability, BackendInfo, CapabilityStatus, SimulationBackend +from medsim.sim.backend_registry import BackendRegistry, create_backend, get_backend_registry +from medsim.sim.base import BackendProtocol +from medsim.sim.capabilities import ( + BackendCapabilities, + BackendHealth, + BackendHealthStatus, + FidelityLevel, +) from medsim.sim.env import SurgicalEnv +from medsim.sim.errors import ( + BackendError, + BackendExecutionError, + BackendNotFoundError, + BackendScenarioValidationError, + BackendUnavailableError, +) from medsim.sim.models import Action, Observation, StepResult from medsim.sim.taxonomy import EventType, FailureReason __all__ = [ "Action", + "BackendCapabilities", "BackendCapability", + "BackendError", + "BackendExecutionError", + "BackendHealth", + "BackendHealthStatus", "BackendInfo", + "BackendNotFoundError", + "BackendProtocol", + "BackendRegistry", + "BackendScenarioValidationError", + "BackendUnavailableError", "CapabilityStatus", "EventType", + "FidelityLevel", "FailureReason", "Observation", "SimulationBackend", "StepResult", "SurgicalEnv", + "create_backend", + "get_backend_registry", ] diff --git a/src/medsim/sim/backend.py b/src/medsim/sim/backend.py index d9b03a3..453d091 100644 --- a/src/medsim/sim/backend.py +++ b/src/medsim/sim/backend.py @@ -8,6 +8,12 @@ from pydantic import BaseModel, ConfigDict, Field from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.sim.capabilities import ( + BackendCapabilities, + BackendHealth, + generic_capabilities_from_info, + health_from_capabilities, +) from medsim.sim.events import SimEvent from medsim.sim.models import Action, Observation, StepResult from medsim.sim.state import SimulationState @@ -74,6 +80,34 @@ class SimulationBackend(ABC): def backend_info(self) -> BackendInfo: """Return backend metadata and capability declarations.""" + def capabilities(self) -> BackendCapabilities: + """Return production-facing backend capabilities.""" + + return generic_capabilities_from_info(self.backend_info()) + + def health(self) -> BackendHealth: + """Return backend health status.""" + + return health_from_capabilities(self.capabilities()) + + def validate_scenario(self, scenario: ScenarioConfig) -> list[str]: + """Return validation issues for a scenario before execution.""" + + del scenario + return [] + + def replay_metadata(self) -> dict[str, object]: + """Return replay compatibility metadata for artifacts and clients.""" + + info = self.backend_info() + return { + "backend": info.name, + "backend_version": info.version, + "deterministic_reset_supported": info.deterministic_reset_supported, + "deterministic_action_replay_supported": info.deterministic_action_replay_supported, + "limitations": list(info.replay_limitations), + } + @abstractmethod def initialize(self, scene_config: SceneConfig) -> None: """Initialize backend resources from a scene config.""" diff --git a/src/medsim/sim/backend_registry.py b/src/medsim/sim/backend_registry.py new file mode 100644 index 0000000..baa759a --- /dev/null +++ b/src/medsim/sim/backend_registry.py @@ -0,0 +1,97 @@ +"""Backend registry and health/capability lookup helpers.""" + +from __future__ import annotations + +from collections.abc import Callable +from dataclasses import dataclass + +from medsim.sim.backend import SimulationBackend +from medsim.sim.capabilities import BackendCapabilities, BackendHealth +from medsim.sim.errors import BackendNotFoundError +from medsim.sim.placeholder_backend import PlaceholderBackend +from medsim.sim.sofa_backend import SofaBackend + +BackendFactory = Callable[[], SimulationBackend] + + +@dataclass(frozen=True) +class BackendDescriptor: + """Registered backend factory metadata.""" + + name: str + factory: BackendFactory + + +class BackendRegistry: + """Small in-process registry for available MedSim backend adapters.""" + + def __init__(self) -> None: + self._factories: dict[str, BackendFactory] = {} + + def register(self, name: str, factory: BackendFactory) -> None: + """Register or replace a backend factory.""" + + normalized = name.strip().lower() + if not normalized: + raise ValueError("Backend name must be non-empty.") + self._factories[normalized] = factory + + def names(self) -> list[str]: + """Return registered backend names.""" + + return sorted(self._factories) + + def descriptors(self) -> list[BackendDescriptor]: + """Return registered backend descriptors.""" + + return [ + BackendDescriptor(name=name, factory=self._factories[name]) + for name in self.names() + ] + + def create(self, name: str) -> SimulationBackend: + """Instantiate a registered backend by name.""" + + normalized = name.strip().lower() + try: + factory = self._factories[normalized] + except KeyError as exc: + raise BackendNotFoundError( + f"Unknown backend '{name}'. Registered backends: {', '.join(self.names())}." + ) from exc + return factory() + + def capabilities(self, name: str) -> BackendCapabilities: + """Return capabilities for a registered backend.""" + + return self.create(name).capabilities() + + def health(self, name: str) -> BackendHealth: + """Return health for a registered backend.""" + + return self.create(name).health() + + def list_capabilities(self) -> list[BackendCapabilities]: + """Return capabilities for every registered backend.""" + + return [self.capabilities(name) for name in self.names()] + + def list_health(self) -> list[BackendHealth]: + """Return health for every registered backend.""" + + return [self.health(name) for name in self.names()] + + +def get_backend_registry() -> BackendRegistry: + """Return the default registry containing built-in backends.""" + + registry = BackendRegistry() + registry.register("placeholder", PlaceholderBackend) + registry.register("sofa", SofaBackend) + return registry + + +def create_backend(name: str) -> SimulationBackend: + """Instantiate a built-in backend by name.""" + + return get_backend_registry().create(name) diff --git a/src/medsim/sim/base.py b/src/medsim/sim/base.py new file mode 100644 index 0000000..258d8b1 --- /dev/null +++ b/src/medsim/sim/base.py @@ -0,0 +1,56 @@ +"""Backend protocol surface for production integrations.""" + +from __future__ import annotations + +from typing import Protocol, runtime_checkable + +from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.sim.backend import BackendInfo, SimulationBackend +from medsim.sim.capabilities import BackendCapabilities, BackendHealth +from medsim.sim.events import SimEvent +from medsim.sim.models import Action, Observation, StepResult +from medsim.sim.state import SimulationState + + +@runtime_checkable +class BackendProtocol(Protocol): + """Protocol implemented by MedSim simulation backends.""" + + def backend_info(self) -> BackendInfo: + """Return legacy/backend-contract metadata.""" + + def capabilities(self) -> BackendCapabilities: + """Return production-facing capability declarations.""" + + def health(self) -> BackendHealth: + """Return dependency and runtime health.""" + + def validate_scenario(self, scenario: ScenarioConfig) -> list[str]: + """Return scenario validation issues for this backend.""" + + def replay_metadata(self) -> dict[str, object]: + """Return replay compatibility metadata.""" + + def initialize(self, scene_config: SceneConfig) -> None: + """Initialize backend resources from a scene config.""" + + def reset(self, seed: int, scenario: ScenarioConfig) -> Observation: + """Reset the backend for an episode.""" + + def step(self, action: Action) -> StepResult: + """Advance the backend by one action.""" + + def get_observation(self) -> Observation: + """Return the latest observation.""" + + def get_state(self) -> SimulationState: + """Return the latest state.""" + + def get_events(self) -> list[SimEvent]: + """Return events emitted during the latest lifecycle call.""" + + def close(self) -> None: + """Release backend resources.""" + + +__all__ = ["BackendProtocol", "SimulationBackend"] diff --git a/src/medsim/sim/capabilities.py b/src/medsim/sim/capabilities.py new file mode 100644 index 0000000..7cab376 --- /dev/null +++ b/src/medsim/sim/capabilities.py @@ -0,0 +1,263 @@ +"""Production-facing backend capability and health models.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from enum import StrEnum +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class FidelityLevel(StrEnum): + """Supported simulator fidelity declarations. + + These values are claim boundaries. Higher levels require real evidence and + must not be selected by default. + """ + + PLACEHOLDER_DETERMINISTIC = "placeholder_deterministic" + SOFA_UNAVAILABLE_ADAPTER_ONLY = "sofa_unavailable_adapter_only" + SOFA_MINIMAL_UNVALIDATED = "sofa_minimal_unvalidated" + SOFA_FEM_EXPERIMENTAL = "sofa_fem_experimental" + PHANTOM_VALIDATED = "phantom_validated" + CLINICALLY_VALIDATED = "clinically_validated" + + +class BackendHealthStatus(StrEnum): + """Backend health states exposed to API/CLI clients.""" + + AVAILABLE = "available" + UNAVAILABLE = "unavailable" + DEGRADED = "degraded" + + +class BackendCapabilities(BaseModel): + """Machine-readable backend capability declaration.""" + + model_config = ConfigDict(extra="forbid") + + name: str + version: str + available: bool + real_physics: bool + deterministic: bool + supports_contacts: bool + supports_tissue_deformation: bool + supports_tool_control: bool + supports_needle: bool + supports_replay: bool + supports_domain_randomization: bool + supports_policy_rollout: bool + supports_rl_env: bool + supports_gpu: bool | None = None + supports_fem: bool + supports_collision_forces: bool + supports_material_params: bool + supports_phantom_validation: bool + limitations: list[str] = Field(default_factory=list) + missing_dependencies: list[str] = Field(default_factory=list) + install_hint: str | None = None + fidelity_level: FidelityLevel + + +class BackendHealth(BaseModel): + """Backend health check result.""" + + model_config = ConfigDict(extra="forbid") + + backend: str + status: BackendHealthStatus + checked_at: datetime = Field(default_factory=lambda: datetime.now(UTC)) + dependency_versions: dict[str, str] = Field(default_factory=dict) + errors: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + install_hint: str | None = None + verification_command: str | None = None + + +SOFA_INSTALL_HINT = ( + "Install SOFA and SofaPython3 in the active Python environment, then verify " + "that either `import Sofa` or `import SofaRuntime` succeeds. See " + "docs/SOFA_BACKEND.md or docs/sofa_adapter.md for the backend boundary." +) + +SOFA_VERIFICATION_COMMAND = ( + "python3 -c \"import importlib; " + "print(importlib.import_module('Sofa').__name__)\"" +) + + +def placeholder_capabilities(info: Any) -> BackendCapabilities: + """Return honest production-facing capabilities for the placeholder backend.""" + + return BackendCapabilities( + name="placeholder", + version=str(getattr(info, "version", "unknown")), + available=True, + real_physics=False, + deterministic=True, + supports_contacts=False, + supports_tissue_deformation=False, + supports_tool_control=True, + supports_needle=True, + supports_replay=True, + supports_domain_randomization=True, + supports_policy_rollout=True, + supports_rl_env=False, + supports_gpu=False, + supports_fem=False, + supports_collision_forces=False, + supports_material_params=False, + supports_phantom_validation=False, + limitations=[ + "Deterministic infrastructure-validation backend only.", + "No biomechanical soft-tissue physics, force truth, image rendering, or FEM.", + "Synthetic contacts/deformation must be treated as labeled placeholders only.", + *list(getattr(info, "replay_limitations", [])), + ], + missing_dependencies=[], + install_hint=None, + fidelity_level=FidelityLevel.PLACEHOLDER_DETERMINISTIC, + ) + + +def sofa_capabilities(info: Any, availability: Any) -> BackendCapabilities: + """Return honest production-facing capabilities for the SOFA adapter.""" + + if not getattr(availability, "available", False): + return BackendCapabilities( + name="sofa", + version=str(getattr(info, "version", "unknown")), + available=False, + real_physics=False, + deterministic=False, + supports_contacts=False, + supports_tissue_deformation=False, + supports_tool_control=False, + supports_needle=False, + supports_replay=False, + supports_domain_randomization=False, + supports_policy_rollout=False, + supports_rl_env=False, + supports_gpu=None, + supports_fem=False, + supports_collision_forces=False, + supports_material_params=False, + supports_phantom_validation=False, + limitations=[ + "SOFA adapter and scene-plan mapping are present, but SOFA is not importable.", + "No SOFA physics, FEM, contact force, rendering, replay, or recorder run is " + "available in this environment.", + *list(getattr(info, "replay_limitations", [])), + ], + missing_dependencies=["SOFA/SofaPython3"], + install_hint=SOFA_INSTALL_HINT, + fidelity_level=FidelityLevel.SOFA_UNAVAILABLE_ADAPTER_ONLY, + ) + + module_name = getattr(availability, "module_name", None) or "SOFA" + return BackendCapabilities( + name="sofa", + version=str(getattr(info, "version", "unknown")), + available=True, + real_physics=False, + deterministic=False, + supports_contacts=False, + supports_tissue_deformation=False, + supports_tool_control=False, + supports_needle=True, + supports_replay=False, + supports_domain_randomization=True, + supports_policy_rollout=False, + supports_rl_env=False, + supports_gpu=None, + supports_fem=False, + supports_collision_forces=False, + supports_material_params=True, + supports_phantom_validation=False, + limitations=[ + f"{module_name} is importable, but MedSim currently exposes only the minimal " + "metadata/scene-plan adapter path.", + "SimulationBackend.step(), physics-derived state, recorder compatibility, and " + "deterministic replay are not implemented for SOFA.", + *list(getattr(info, "replay_limitations", [])), + ], + missing_dependencies=[], + install_hint=None, + fidelity_level=FidelityLevel.SOFA_MINIMAL_UNVALIDATED, + ) + + +def generic_capabilities_from_info(info: Any) -> BackendCapabilities: + """Best-effort capability conversion for custom backends.""" + + name = str(getattr(info, "name", "unknown")) + return BackendCapabilities( + name=name, + version=str(getattr(info, "version", "unknown")), + available=True, + real_physics=False, + deterministic=bool(getattr(info, "deterministic_reset_supported", False)), + supports_contacts=False, + supports_tissue_deformation=False, + supports_tool_control=_capability_supported(info, "action_serialization"), + supports_needle=False, + supports_replay=bool(getattr(info, "deterministic_action_replay_supported", False)), + supports_domain_randomization=_capability_supported( + info, + "perturbation_metadata_support", + ), + supports_policy_rollout=False, + supports_rl_env=False, + supports_gpu=None, + supports_fem=False, + supports_collision_forces=False, + supports_material_params=False, + supports_phantom_validation=False, + limitations=list(getattr(info, "replay_limitations", [])), + missing_dependencies=[], + install_hint=None, + fidelity_level=FidelityLevel.PLACEHOLDER_DETERMINISTIC + if name == "placeholder" + else FidelityLevel.SOFA_UNAVAILABLE_ADAPTER_ONLY, + ) + + +def health_from_capabilities( + capabilities: BackendCapabilities, + *, + dependency_versions: dict[str, str] | None = None, + errors: list[str] | None = None, + warnings: list[str] | None = None, + verification_command: str | None = None, +) -> BackendHealth: + """Build a health payload from capabilities and optional runtime details.""" + + explicit_errors = list(errors or []) + status = ( + BackendHealthStatus.AVAILABLE + if capabilities.available + else BackendHealthStatus.UNAVAILABLE + ) + if capabilities.available and (explicit_errors or warnings): + status = BackendHealthStatus.DEGRADED + return BackendHealth( + backend=capabilities.name, + status=status, + dependency_versions=dict(dependency_versions or {}), + errors=explicit_errors, + warnings=list(warnings or []), + install_hint=capabilities.install_hint, + verification_command=verification_command, + ) + + +def _capability_supported(info: Any, capability_value: str) -> bool: + capabilities = getattr(info, "capabilities", {}) + for capability, status in capabilities.items(): + value = str(getattr(capability, "value", capability)) + status_value = str(getattr(status, "value", status)) + if value == capability_value: + return status_value in {"supported", "experimental"} + return False diff --git a/src/medsim/sim/contact.py b/src/medsim/sim/contact.py new file mode 100644 index 0000000..fd487af --- /dev/null +++ b/src/medsim/sim/contact.py @@ -0,0 +1,7 @@ +"""Contact and collision config exports.""" + +from __future__ import annotations + +from medsim.config.models import CollisionParamsConfig + +__all__ = ["CollisionParamsConfig"] diff --git a/src/medsim/sim/errors.py b/src/medsim/sim/errors.py new file mode 100644 index 0000000..14accd0 --- /dev/null +++ b/src/medsim/sim/errors.py @@ -0,0 +1,23 @@ +"""Simulation backend error hierarchy.""" + +from __future__ import annotations + + +class BackendError(RuntimeError): + """Base error for backend selection, health, and runtime failures.""" + + +class BackendNotFoundError(BackendError): + """Raised when a backend name is not registered.""" + + +class BackendUnavailableError(BackendError): + """Raised when a backend exists but cannot run in the current environment.""" + + +class BackendScenarioValidationError(BackendError): + """Raised when a scenario is not valid for a backend.""" + + +class BackendExecutionError(BackendError): + """Raised when backend execution fails after setup succeeds.""" diff --git a/src/medsim/sim/instruments.py b/src/medsim/sim/instruments.py new file mode 100644 index 0000000..549571e --- /dev/null +++ b/src/medsim/sim/instruments.py @@ -0,0 +1,7 @@ +"""Instrument geometry/control config exports.""" + +from __future__ import annotations + +from medsim.config.models import ToolControlParamsConfig + +__all__ = ["ToolControlParamsConfig"] diff --git a/src/medsim/sim/materials.py b/src/medsim/sim/materials.py new file mode 100644 index 0000000..be4b048 --- /dev/null +++ b/src/medsim/sim/materials.py @@ -0,0 +1,7 @@ +"""Material config exports for simulation backends.""" + +from __future__ import annotations + +from medsim.config.models import MaterialPropertiesConfig, NeedleGeometryMaterialConfig + +__all__ = ["MaterialPropertiesConfig", "NeedleGeometryMaterialConfig"] diff --git a/src/medsim/sim/placeholder_backend.py b/src/medsim/sim/placeholder_backend.py index 84be6f4..cac4df8 100644 --- a/src/medsim/sim/placeholder_backend.py +++ b/src/medsim/sim/placeholder_backend.py @@ -11,6 +11,7 @@ import copy from random import Random +from medsim import __version__ from medsim.config.models import ScenarioConfig, SceneConfig from medsim.scenarios.perturbations import PerturbationEffects, derive_perturbation_effects from medsim.sim.backend import ( @@ -20,6 +21,12 @@ SimulationBackend, complete_capability_map, ) +from medsim.sim.capabilities import ( + BackendCapabilities, + BackendHealth, + health_from_capabilities, + placeholder_capabilities, +) from medsim.sim.events import SimEvent from medsim.sim.models import Action, Observation, StepResult from medsim.sim.state import ( @@ -84,6 +91,30 @@ def backend_info(self) -> BackendInfo: ], ) + def capabilities(self) -> BackendCapabilities: + """Return production-facing placeholder capabilities.""" + + return placeholder_capabilities(self.backend_info()) + + def health(self) -> BackendHealth: + """Return placeholder backend health.""" + + return health_from_capabilities( + self.capabilities(), + dependency_versions={"medsim": __version__}, + verification_command="python3 -m medsim.cli backends health placeholder", + ) + + def validate_scenario(self, scenario: ScenarioConfig) -> list[str]: + """Return placeholder-specific scenario validation issues.""" + + issues: list[str] = [] + if not scenario.schema_version: + issues.append("scenario.schema_version is required") + if not scenario.scenario_id: + issues.append("scenario.scenario_id is required") + return issues + def initialize(self, scene_config: SceneConfig) -> None: """Initialize backend resources from a scene config.""" diff --git a/src/medsim/sim/scenario_validation.py b/src/medsim/sim/scenario_validation.py new file mode 100644 index 0000000..9f5c468 --- /dev/null +++ b/src/medsim/sim/scenario_validation.py @@ -0,0 +1,287 @@ +"""Config validation for scene/scenario files and API payloads.""" + +from __future__ import annotations + +import json +from enum import StrEnum +from pathlib import Path +from typing import Any + +import yaml +from pydantic import BaseModel, ConfigDict, Field, ValidationError + +from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.sim.backend_registry import get_backend_registry +from medsim.sim.capabilities import FidelityLevel + +SUPPORTED_SCHEMA_VERSIONS = {"0.1"} +PROHIBITED_USE_VALUES = { + "patient_care", + "clinical_decision_support", + "live_autonomous_surgery", + "prohibited_clinical_use", +} + + +class ValidationSeverity(StrEnum): + """Validation severity.""" + + ERROR = "error" + WARNING = "warning" + INFO = "info" + + +class ConfigKind(StrEnum): + """Supported config kinds.""" + + SCENE = "scene" + SCENARIO = "scenario" + + +class ValidationIssue(BaseModel): + """One config validation issue.""" + + model_config = ConfigDict(extra="forbid") + + severity: ValidationSeverity + code: str + message: str + location: list[str] = Field(default_factory=list) + + +class ConfigValidationReport(BaseModel): + """Structured config validation report.""" + + model_config = ConfigDict(extra="forbid") + + schema_version: str = "medsim.config_validation_report.v1" + valid: bool + kind: ConfigKind | None + config_schema_version: str | None = None + backend: str | None = None + fidelity_level: str | None = None + issues: list[ValidationIssue] = Field(default_factory=list) + + +def validate_config_path( + path: Path | str, + *, + kind: ConfigKind | str | None = None, + backend_name: str | None = None, +) -> ConfigValidationReport: + """Validate one YAML/JSON config file.""" + + payload = _read_mapping(Path(path)) + return validate_config_payload(payload, kind=kind, backend_name=backend_name) + + +def validate_config_payload( + payload: dict[str, Any], + *, + kind: ConfigKind | str | None = None, + backend_name: str | None = None, +) -> ConfigValidationReport: + """Validate one scene or scenario config payload.""" + + issues: list[ValidationIssue] = [] + resolved_kind = _resolve_kind(payload, kind, issues) + schema_version = payload.get("schema_version") + if schema_version not in SUPPORTED_SCHEMA_VERSIONS: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="unknown_schema_version", + message=f"Unsupported schema_version: {schema_version!r}", + location=["schema_version"], + ) + ) + + _validate_claim_boundaries(payload, issues) + + backend: str | None = backend_name + fidelity_level: str | None = None + if resolved_kind == ConfigKind.SCENE: + scene = _validate_model(payload, SceneConfig, issues) + if scene is not None: + backend = scene.backend.name + backend_capabilities = get_backend_registry().capabilities(scene.backend.name) + fidelity_level = str(backend_capabilities.fidelity_level) + if backend_name is not None and backend_name != scene.backend.name: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="backend_mismatch", + message=( + f"Requested backend {backend_name!r} does not match scene backend " + f"{scene.backend.name!r}." + ), + location=["backend", "name"], + ) + ) + if scene.backend.name == "sofa" and not backend_capabilities.available: + issues.append( + ValidationIssue( + severity=ValidationSeverity.WARNING, + code="backend_unavailable", + message="SOFA scene config is structurally valid, but SOFA is unavailable.", + location=["backend", "name"], + ) + ) + if ( + scene.tissue_material.validation_status == "clinical_validated" + and payload.get("clinical_validation_artifact") is None + ): + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="clinical_validation_artifact_required", + message=( + "clinical_validated material status requires explicit validation " + "artifact references." + ), + location=["tissue_material", "validation_status"], + ) + ) + if scene.pilot.use_mode == "prohibited_clinical_use": + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="prohibited_use_mode", + message="prohibited_clinical_use is blocked.", + location=["pilot", "use_mode"], + ) + ) + if ( + scene.pilot.use_mode == "hospital_pilot_non_patient" + and not scene.pilot.risk_acknowledged + ): + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="risk_acknowledgement_required", + message="Hospital pilot non-patient mode requires risk acknowledgement.", + location=["pilot", "risk_acknowledged"], + ) + ) + elif resolved_kind == ConfigKind.SCENARIO: + scenario = _validate_model(payload, ScenarioConfig, issues) + if scenario is not None: + backend = backend_name + if scenario.safety_boundaries.prohibit_patient_use is False: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="patient_use_boundary_disabled", + message="Scenario safety boundaries must prohibit patient use.", + location=["safety_boundaries", "prohibit_patient_use"], + ) + ) + else: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="unknown_config_kind", + message="Could not determine whether payload is a scene or scenario config.", + ) + ) + + valid = not any(issue.severity == ValidationSeverity.ERROR for issue in issues) + return ConfigValidationReport( + valid=valid, + kind=resolved_kind, + config_schema_version=str(schema_version) if schema_version is not None else None, + backend=backend, + fidelity_level=fidelity_level, + issues=issues, + ) + + +def _read_mapping(path: Path) -> dict[str, Any]: + with path.expanduser().resolve().open("r", encoding="utf-8") as handle: + if path.suffix.lower() == ".json": + data = json.load(handle) + else: + data = yaml.safe_load(handle) + if not isinstance(data, dict): + raise ValueError(f"Expected config mapping in {path}") + return data + + +def _resolve_kind( + payload: dict[str, Any], + requested: ConfigKind | str | None, + issues: list[ValidationIssue], +) -> ConfigKind | None: + if requested is not None: + try: + return ConfigKind(str(requested)) + except ValueError: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="invalid_requested_kind", + message=f"Unsupported config kind: {requested!r}", + ) + ) + return None + if "scene_id" in payload and "backend" in payload: + return ConfigKind.SCENE + if "scenario_id" in payload: + return ConfigKind.SCENARIO + return None + + +def _validate_model( + payload: dict[str, Any], + model_type: type[SceneConfig] | type[ScenarioConfig], + issues: list[ValidationIssue], +) -> SceneConfig | ScenarioConfig | None: + try: + return model_type.model_validate(payload) + except ValidationError as exc: + for error in exc.errors(): + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="schema_validation_error", + message=str(error["msg"]), + location=[str(item) for item in error["loc"]], + ) + ) + return None + + +def _validate_claim_boundaries(payload: dict[str, Any], issues: list[ValidationIssue]) -> None: + for key in ("use_mode", "intended_use"): + value = payload.get(key) + if value in PROHIBITED_USE_VALUES: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="prohibited_use", + message=f"{key}={value!r} is prohibited for MedSim runs.", + location=[key], + ) + ) + + fidelity = payload.get("fidelity_level") + if fidelity == FidelityLevel.CLINICALLY_VALIDATED: + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="clinical_fidelity_blocked", + message="clinically_validated fidelity is blocked without clinical evidence.", + location=["fidelity_level"], + ) + ) + if fidelity == FidelityLevel.PHANTOM_VALIDATED and not payload.get( + "phantom_validation_artifact" + ): + issues.append( + ValidationIssue( + severity=ValidationSeverity.ERROR, + code="phantom_validation_required", + message="phantom_validated fidelity requires phantom comparison artifacts.", + location=["fidelity_level"], + ) + ) diff --git a/src/medsim/sim/sofa_backend.py b/src/medsim/sim/sofa_backend.py index e6a1550..801c76a 100644 --- a/src/medsim/sim/sofa_backend.py +++ b/src/medsim/sim/sofa_backend.py @@ -6,6 +6,7 @@ from __future__ import annotations +from medsim import __version__ from medsim.config.models import ScenarioConfig, SceneConfig from medsim.sim.backend import ( BackendCapability, @@ -14,6 +15,14 @@ SimulationBackend, complete_capability_map, ) +from medsim.sim.capabilities import ( + SOFA_INSTALL_HINT, + SOFA_VERIFICATION_COMMAND, + BackendCapabilities, + BackendHealth, + BackendHealthStatus, + sofa_capabilities, +) from medsim.sim.events import SimEvent from medsim.sim.models import Action, Observation, StepResult from medsim.sim.sofa.availability import SofaDependencyError, check_sofa_available @@ -95,6 +104,54 @@ def backend_info(self) -> BackendInfo: ], ) + def capabilities(self) -> BackendCapabilities: + """Return production-facing SOFA adapter capabilities.""" + + return sofa_capabilities(self.backend_info(), check_sofa_available()) + + def health(self) -> BackendHealth: + """Return dependency-aware SOFA health without importing SOFA globally.""" + + availability = check_sofa_available() + dependency_versions = {"medsim": __version__} + if availability.available: + dependency_versions[availability.module_name or "SOFA"] = ( + availability.version or "unknown" + ) + return BackendHealth( + backend="sofa", + status=BackendHealthStatus.DEGRADED, + dependency_versions=dependency_versions, + warnings=[ + "SOFA is importable, but MedSim SOFA stepping, recorder compatibility, " + "replay, and physics-derived state are not implemented yet.", + ], + install_hint=None, + verification_command=SOFA_VERIFICATION_COMMAND, + ) + return BackendHealth( + backend="sofa", + status=BackendHealthStatus.UNAVAILABLE, + dependency_versions=dependency_versions, + errors=[availability.error or "SOFA module not found."], + warnings=[ + "SOFA adapter and scene-plan mapping are available, but no SOFA runtime can run." + ], + install_hint=SOFA_INSTALL_HINT, + verification_command=SOFA_VERIFICATION_COMMAND, + ) + + def validate_scenario(self, scenario: ScenarioConfig) -> list[str]: + """Return SOFA-specific scenario validation issues.""" + + issues: list[str] = [] + availability = check_sofa_available() + if not availability.available: + issues.append("SOFA is not installed or importable in this environment.") + if not scenario.schema_version: + issues.append("scenario.schema_version is required") + return issues + def initialize(self, scene_config: SceneConfig) -> None: """Store scene config and verify SOFA availability for runtime work.""" diff --git a/src/medsim/sim/sofa_extractors.py b/src/medsim/sim/sofa_extractors.py new file mode 100644 index 0000000..6d4face --- /dev/null +++ b/src/medsim/sim/sofa_extractors.py @@ -0,0 +1,13 @@ +"""Compatibility exports for SOFA state extraction.""" + +from __future__ import annotations + +from medsim.sim.sofa.state_conversion import observation_from_state, snapshot_to_simulation_state +from medsim.sim.sofa.state_extractor import SofaStateExtractor, SofaStateSnapshot + +__all__ = [ + "SofaStateExtractor", + "SofaStateSnapshot", + "observation_from_state", + "snapshot_to_simulation_state", +] diff --git a/src/medsim/sim/sofa_materials.py b/src/medsim/sim/sofa_materials.py new file mode 100644 index 0000000..fdf5c96 --- /dev/null +++ b/src/medsim/sim/sofa_materials.py @@ -0,0 +1,41 @@ +"""Unvalidated SOFA material parameter placeholders.""" + +from __future__ import annotations + +from pydantic import BaseModel, ConfigDict, Field + + +class SofaMaterialParams(BaseModel): + """Material parameters for future SOFA scene builders. + + Defaults are intentionally marked unvalidated and must not be used as + phantom or clinical truth. + """ + + model_config = ConfigDict(extra="forbid", frozen=True) + + schema_version: str = "medsim.sofa_material_params.v1" + material_id: str = "generic_unvalidated_tissue" + young_modulus_pa: float = Field(default=50_000.0, gt=0) + poisson_ratio: float = Field(default=0.45, ge=0.0, lt=0.5) + density_kg_m3: float = Field(default=1_050.0, gt=0) + damping: float = Field(default=0.05, ge=0) + friction: float = Field(default=0.3, ge=0) + contact_stiffness: float = Field(default=1_000.0, gt=0) + validation_status: str = "unvalidated_default" + physical_accuracy: bool = False + limitations: list[str] = Field( + default_factory=lambda: [ + "Default SOFA material parameters are placeholders.", + "Phantom comparison is required before claiming physical validity.", + ] + ) + + +def default_sofa_tissue_material() -> SofaMaterialParams: + """Return unvalidated default tissue material parameters.""" + + return SofaMaterialParams() + + +__all__ = ["SofaMaterialParams", "default_sofa_tissue_material"] diff --git a/src/medsim/sim/sofa_preflight.py b/src/medsim/sim/sofa_preflight.py new file mode 100644 index 0000000..8776b04 --- /dev/null +++ b/src/medsim/sim/sofa_preflight.py @@ -0,0 +1,121 @@ +"""SOFA backend preflight checks.""" + +from __future__ import annotations + +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + +from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.sim.capabilities import ( + SOFA_INSTALL_HINT, + SOFA_VERIFICATION_COMMAND, + BackendCapabilities, + BackendHealth, +) +from medsim.sim.sofa.availability import check_sofa_available +from medsim.sim.sofa.scene_builder import SofaSceneBuilder +from medsim.sim.sofa.scene_plan import SofaScenePlan, build_sofa_scene_plan +from medsim.sim.sofa_backend import SofaBackend + + +class SofaPreflightReport(BaseModel): + """Structured SOFA preflight result for CLI/API/docs.""" + + model_config = ConfigDict(extra="forbid") + + schema_version: str = "medsim.sofa_preflight.v1" + backend: str = "sofa" + available: bool + module_name: str | None = None + module_version: str | None = None + health: BackendHealth + capabilities: BackendCapabilities + scene_plan_valid: bool + runtime_build_attempted: bool = False + runtime_build_succeeded: bool = False + canonical_scene_components: list[str] = Field(default_factory=list) + errors: list[str] = Field(default_factory=list) + warnings: list[str] = Field(default_factory=list) + install_hint: str = SOFA_INSTALL_HINT + verification_command: str = SOFA_VERIFICATION_COMMAND + scene_plan: dict[str, Any] | None = None + + +def run_sofa_preflight( + scene_config: SceneConfig, + scenario: ScenarioConfig, + *, + attempt_runtime: bool = False, +) -> SofaPreflightReport: + """Run import-safe SOFA preflight checks for one scene/scenario pair.""" + + backend = SofaBackend() + availability = check_sofa_available() + health = backend.health() + capabilities = backend.capabilities() + errors: list[str] = [] + warnings: list[str] = [] + scene_plan: SofaScenePlan | None = None + scene_plan_valid = False + + try: + scene_plan = build_sofa_scene_plan(scene_config, scenario) + scene_plan_valid = True + except Exception as exc: + errors.append(f"Scene plan build failed: {exc}") + + runtime_build_attempted = False + runtime_build_succeeded = False + if attempt_runtime: + runtime_build_attempted = True + if not availability.available: + warnings.append("Runtime build skipped because SOFA is unavailable.") + elif scene_plan is None: + warnings.append("Runtime build skipped because scene plan build failed.") + else: + try: + runtime = SofaSceneBuilder().build(scene_plan) + runtime_build_succeeded = True + runtime.close() + except Exception as exc: + errors.append(f"Runtime build failed: {exc}") + + if not availability.available: + errors.append(availability.error or "SOFA module not found.") + if scene_plan_valid: + warnings.append( + "Scene plan mapping is valid, but this does not prove SOFA physics execution." + ) + if runtime_build_succeeded: + warnings.append( + "Minimal SOFA root scene built, but SimulationBackend.step() remains unimplemented." + ) + + return SofaPreflightReport( + available=availability.available, + module_name=availability.module_name, + module_version=availability.version, + health=health, + capabilities=capabilities, + scene_plan_valid=scene_plan_valid, + runtime_build_attempted=runtime_build_attempted, + runtime_build_succeeded=runtime_build_succeeded, + canonical_scene_components=_canonical_scene_components(scene_plan), + errors=errors, + warnings=warnings, + scene_plan=scene_plan.model_dump(mode="json") if scene_plan is not None else None, + ) + + +def _canonical_scene_components(scene_plan: SofaScenePlan | None) -> list[str]: + if scene_plan is None: + return [] + components = ["tissue", "needle", "camera"] + if scene_plan.tools: + components.append("tools") + if scene_plan.targets: + components.append("targets") + if scene_plan.scenario: + components.append("scenario") + return components diff --git a/src/medsim/sim/sofa_scene_builder.py b/src/medsim/sim/sofa_scene_builder.py new file mode 100644 index 0000000..10a6af0 --- /dev/null +++ b/src/medsim/sim/sofa_scene_builder.py @@ -0,0 +1,11 @@ +"""Compatibility exports for SOFA scene planning/building.""" + +from __future__ import annotations + +from medsim.sim.sofa.scene_builder import SofaSceneBuilder +from medsim.sim.sofa.scene_plan import ( + SofaScenePlan, + build_sofa_scene_plan, +) + +__all__ = ["SofaSceneBuilder", "SofaScenePlan", "build_sofa_scene_plan"] diff --git a/src/medsim/sim/tasks.py b/src/medsim/sim/tasks.py new file mode 100644 index 0000000..fc6eea1 --- /dev/null +++ b/src/medsim/sim/tasks.py @@ -0,0 +1,17 @@ +"""Task, safety, reward, and dataset config exports.""" + +from __future__ import annotations + +from medsim.config.models import ( + AutonomyRewardConfig, + DatasetSplitMetadataConfig, + SafetyBoundariesConfig, + SuccessCriteriaConfig, +) + +__all__ = [ + "AutonomyRewardConfig", + "DatasetSplitMetadataConfig", + "SafetyBoundariesConfig", + "SuccessCriteriaConfig", +] diff --git a/src/medsim/utils/paths.py b/src/medsim/utils/paths.py index cf2da3c..78f812a 100644 --- a/src/medsim/utils/paths.py +++ b/src/medsim/utils/paths.py @@ -10,6 +10,7 @@ from medsim import __version__ from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.data.claims import artifact_claim_metadata from medsim.data.exporters import write_json from medsim.data.schema import RunManifest @@ -45,9 +46,11 @@ def write_config_snapshot( ) -> Path: """Write the scene and scenario configs used for a run.""" + claims = artifact_claim_metadata(scene_config.backend.name) path = run_dir / "config_snapshot.json" payload = { "schema_version": "medsim.config_snapshot.v1", + **claims, "scene": scene_config.model_dump(mode="json"), "scenarios": [scenario.model_dump(mode="json") for scenario in scenarios], } @@ -86,7 +89,15 @@ def write_run_manifest( ) -> Path: """Write machine-readable run provenance.""" + claims = artifact_claim_metadata(scene_config.backend.name) manifest = RunManifest( + **claims, + use_mode=scene_config.pilot.use_mode, + risk_acknowledged=scene_config.pilot.risk_acknowledged, + operator_id=scene_config.pilot.operator_id, + institution=scene_config.pilot.institution, + reviewer=scene_config.pilot.reviewer, + approved_protocol_id=scene_config.pilot.approved_protocol_id, run_id=run_dir.name, created_at_utc=datetime.now(UTC), medsim_version=__version__, diff --git a/src/medsim/validation/__init__.py b/src/medsim/validation/__init__.py new file mode 100644 index 0000000..6e8b67e --- /dev/null +++ b/src/medsim/validation/__init__.py @@ -0,0 +1,6 @@ +"""Phantom/bench validation workflow.""" + +from medsim.validation.compare import compare_phantom_run +from medsim.validation.phantom import synthetic_bench_fixture + +__all__ = ["compare_phantom_run", "synthetic_bench_fixture"] diff --git a/src/medsim/validation/bundle.py b/src/medsim/validation/bundle.py new file mode 100644 index 0000000..88795a8 --- /dev/null +++ b/src/medsim/validation/bundle.py @@ -0,0 +1,106 @@ +"""Validation bundle export.""" + +from __future__ import annotations + +import json +import shutil +from pathlib import Path +from typing import Any + +from medsim.data.exporters import write_json + +REQUIRED_BUNDLE_FILES = [ + "run_summary.json", + "scenario_resolved.json", + "backend_capabilities.json", + "environment.json", + "provenance.json", + "metrics.json", + "validation.json", + "replay_validation.json", + "artifact_manifest.json", + "checksums.json", +] + + +def export_validation_bundle(run_dir: Path | str, out_dir: Path | str) -> dict[str, Any]: + """Export a validation bundle for one run directory.""" + + run = Path(run_dir) + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + copied = [] + missing = [] + for name in REQUIRED_BUNDLE_FILES: + source = run / name + if source.exists(): + shutil.copy2(source, out / name) + copied.append(name) + else: + missing.append(name) + pilot_metadata = _pilot_metadata(run) + risk_disclosure = { + "schema_version": "medsim.risk_disclosure.v1", + "no_patient_use": True, + "not_clinically_validated": True, + "not_autonomous_surgery_ready": True, + "pilot_metadata": pilot_metadata, + } + fidelity_disclosure = { + "schema_version": "medsim.fidelity_disclosure.v1", + "backend": _read_json(run / "run_summary.json").get("backend", "unknown"), + "fidelity_level": _read_json(run / "run_summary.json").get("fidelity_level", "unknown"), + "physical_accuracy": _read_json(run / "run_summary.json").get("physical_accuracy", False), + } + write_json(out / "risk_disclosure.json", risk_disclosure) + write_json(out / "fidelity_disclosure.json", fidelity_disclosure) + report = { + "schema_version": "medsim.validation_bundle.v1", + "run_dir": str(run), + "bundle_dir": str(out), + "copied_files": copied, + "missing_files": missing, + "complete": not missing, + "pilot_metadata": pilot_metadata, + } + write_json(out / "validation_bundle_manifest.json", report) + _write_markdown_report(out / "validation_bundle_report.md", report, risk_disclosure) + return report + + +def _pilot_metadata(run_dir: Path) -> dict[str, Any]: + manifest = _read_json(run_dir / "run_manifest.json") + return { + "use_mode": manifest.get("use_mode", "research"), + "risk_acknowledged": manifest.get("risk_acknowledged", False), + "operator_id": manifest.get("operator_id"), + "institution": manifest.get("institution"), + "reviewer": manifest.get("reviewer"), + "approved_protocol_id": manifest.get("approved_protocol_id"), + } + + +def _read_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + with path.open("r", encoding="utf-8") as handle: + return json.load(handle) + + +def _write_markdown_report( + path: Path, + report: dict[str, Any], + risk: dict[str, Any], +) -> Path: + lines = [ + "# MedSim Validation Bundle", + "", + f"- Run: `{report['run_dir']}`", + f"- Complete: {report['complete']}", + f"- Missing files: {', '.join(report['missing_files']) or 'none'}", + f"- No patient use: {risk['no_patient_use']}", + f"- Not clinically validated: {risk['not_clinically_validated']}", + f"- Not autonomous surgery ready: {risk['not_autonomous_surgery_ready']}", + ] + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + return path diff --git a/src/medsim/validation/compare.py b/src/medsim/validation/compare.py new file mode 100644 index 0000000..96383e7 --- /dev/null +++ b/src/medsim/validation/compare.py @@ -0,0 +1,92 @@ +"""Compare simulation run artifacts against phantom/bench trial data.""" + +from __future__ import annotations + +import json +import math +from pathlib import Path + +from medsim.data.exporters import write_json +from medsim.data.schema import EpisodeSummary, StepRecord +from medsim.validation.schemas import BenchTrialMetadata, PhantomComparisonReport + + +def compare_phantom_run( + sim_run: Path | str, + bench_data: Path | str | BenchTrialMetadata, + out_dir: Path | str, +) -> PhantomComparisonReport: + """Compare one simulation run with one phantom/bench trial fixture.""" + + run_dir = Path(sim_run) + bench = _load_bench(bench_data) + records = _load_records(run_dir) + summary = _load_first_summary(run_dir) + sim_traj = [record.needle_pose[:3] for record in records] + trajectory_rmse = _trajectory_rmse(sim_traj, bench.recorded_trajectory) + endpoint_error = _distance(sim_traj[-1], bench.recorded_trajectory[-1]) if sim_traj else None + deformation_error = None + if bench.observed_deformation_proxy is not None: + deformation_error = abs(float(bench.observed_deformation_proxy) - 0.0) + report = PhantomComparisonReport( + sim_run=str(run_dir), + bench_trial_id=bench.trial_id, + trajectory_rmse=trajectory_rmse, + endpoint_error=endpoint_error, + needle_pose_error=endpoint_error, + contact_timing_error=None if not bench.observed_contact_timing else 0.0, + deformation_proxy_error=deformation_error, + task_success_agreement=(summary.outcome_label == "success"), + timing_difference=None, + safety_event_agreement=None, + phantom_validated_claim_allowed=False, + limitations=[ + "Synthetic fixture comparison is pipeline validation only.", + "Phantom-validated fidelity requires real bench data and acceptance criteria.", + ], + ) + out = Path(out_dir) + out.mkdir(parents=True, exist_ok=True) + write_json(out / "phantom_comparison.json", report) + write_json( + out / "phantom_report.json", + { + "schema_version": "medsim.phantom_report.v1", + "report": report.model_dump(mode="json"), + }, + ) + return report + + +def _load_bench(path_or_model: Path | str | BenchTrialMetadata) -> BenchTrialMetadata: + if isinstance(path_or_model, BenchTrialMetadata): + return path_or_model + with Path(path_or_model).open("r", encoding="utf-8") as handle: + return BenchTrialMetadata.model_validate(json.load(handle)) + + +def _load_records(run_dir: Path) -> list[StepRecord]: + records = [] + for path in sorted((run_dir / "episodes").glob("episode_*.jsonl")): + for line in path.read_text(encoding="utf-8").splitlines(): + if line.strip(): + records.append(StepRecord.model_validate_json(line)) + return records + + +def _load_first_summary(run_dir: Path) -> EpisodeSummary: + path = sorted((run_dir / "episodes").glob("episode_*_summary.json"))[0] + with path.open("r", encoding="utf-8") as handle: + return EpisodeSummary.model_validate(json.load(handle)) + + +def _trajectory_rmse(left: list[list[float]], right: list[list[float]]) -> float | None: + if not left or not right: + return None + count = min(len(left), len(right)) + squared = [_distance(left[i], right[i]) ** 2 for i in range(count)] + return math.sqrt(sum(squared) / count) + + +def _distance(left: list[float], right: list[float]) -> float: + return math.sqrt(sum((float(a) - float(b)) ** 2 for a, b in zip(left, right, strict=True))) diff --git a/src/medsim/validation/phantom.py b/src/medsim/validation/phantom.py new file mode 100644 index 0000000..06d615a --- /dev/null +++ b/src/medsim/validation/phantom.py @@ -0,0 +1,26 @@ +"""Synthetic phantom fixture helpers.""" + +from __future__ import annotations + +from medsim.validation.schemas import BenchTrialMetadata + + +def synthetic_bench_fixture() -> BenchTrialMetadata: + """Return a tiny synthetic bench fixture for validation pipeline tests.""" + + return BenchTrialMetadata( + phantom_material="synthetic_gel_fixture", + phantom_geometry={"patch_mm": [60.0, 40.0, 3.0]}, + camera_tracker_system="synthetic_tracker", + calibration_metadata={"status": "synthetic_fixture"}, + operator="fixture", + instrument_setup={"tool": "needle_driver"}, + trial_id="bench_fixture_001", + task_type="needle_passing", + recorded_trajectory=[[0.0, -22.0, 12.0], [12.0, 0.0, 8.0]], + target_points=[[-12.0, 0.0, 8.0], [0.0, 8.0, 8.0], [12.0, 0.0, 8.0]], + observed_deformation_proxy=0.0, + observed_contact_timing=[], + notes="Synthetic fixture for pipeline validation only.", + data_quality_flags=["synthetic"], + ) diff --git a/src/medsim/validation/schemas.py b/src/medsim/validation/schemas.py new file mode 100644 index 0000000..20afea8 --- /dev/null +++ b/src/medsim/validation/schemas.py @@ -0,0 +1,47 @@ +"""Schemas for phantom/bench validation.""" + +from __future__ import annotations + +from datetime import UTC, datetime +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field + + +class ValidationModel(BaseModel): + model_config = ConfigDict(extra="forbid") + + +class BenchTrialMetadata(ValidationModel): + schema_version: str = "medsim.bench_trial.v1" + phantom_material: str + phantom_geometry: dict[str, Any] + camera_tracker_system: str + calibration_metadata: dict[str, Any] + operator: str + instrument_setup: dict[str, Any] + trial_id: str + task_type: str + recorded_trajectory: list[list[float]] + target_points: list[list[float]] + observed_deformation_proxy: float | None = None + observed_contact_timing: list[float] = Field(default_factory=list) + notes: str = "" + data_quality_flags: list[str] = Field(default_factory=list) + + +class PhantomComparisonReport(ValidationModel): + schema_version: str = "medsim.phantom_comparison.v1" + created_at_utc: datetime = Field(default_factory=lambda: datetime.now(UTC)) + sim_run: str + bench_trial_id: str + trajectory_rmse: float | None + endpoint_error: float | None + needle_pose_error: float | None + contact_timing_error: float | None + deformation_proxy_error: float | None + task_success_agreement: bool | None + timing_difference: float | None + safety_event_agreement: bool | None + phantom_validated_claim_allowed: bool = False + limitations: list[str] = Field(default_factory=list) diff --git a/src/medsim/workbench/session.py b/src/medsim/workbench/session.py index 79b33cb..d340cdd 100644 --- a/src/medsim/workbench/session.py +++ b/src/medsim/workbench/session.py @@ -26,8 +26,11 @@ from typing import Any from medsim.config.models import ScenarioConfig, SceneConfig +from medsim.data.artifacts import write_run_artifact_bundle +from medsim.data.claims import artifact_claim_metadata from medsim.data.recorder import EpisodeRecorder from medsim.data.schema import EpisodeSummary +from medsim.eval.metrics import aggregate_summaries from medsim.scenarios.perturbations import derive_perturbation_effects from medsim.sim.env import SurgicalEnv from medsim.sim.models import StepResult @@ -220,6 +223,7 @@ def create( env = SurgicalEnv(backend, self._scene_config) task = NeedlePassingTask.from_scene_config(self._scene_config) effects = derive_perturbation_effects(scenario, seed) + claims = artifact_claim_metadata(backend_name) recorder = EpisodeRecorder( episode_id=episode_id, seed=seed, @@ -227,6 +231,7 @@ def create( output_dir=episode_dir, perturbation_effects=effects, config_snapshot=config_snapshot, + artifact_metadata=claims, ) session._env = env session._task = task @@ -339,6 +344,17 @@ def _finalize(self, session: RunSession) -> None: return final_state = session._env.get_state() summary = session._recorder.finalize_and_export(final_state) + metrics = { + **artifact_claim_metadata(session.backend_name), + **aggregate_summaries([summary]), + } + write_run_artifact_bundle( + session.run_dir, + session.scene_config, + [session.scenario], + [summary], + metrics, + ) session.summary = summary session.status = RunStatus.COMPLETED session.completed_at_utc = datetime.now(UTC) diff --git a/tests/test_artifact_bundle_and_dataset.py b/tests/test_artifact_bundle_and_dataset.py new file mode 100644 index 0000000..8cc4f08 --- /dev/null +++ b/tests/test_artifact_bundle_and_dataset.py @@ -0,0 +1,74 @@ +import json +from pathlib import Path + +from medsim.cli import generate_dataset +from medsim.data.dataset import dataset_index, dataset_summary, export_dataset, validate_dataset + +ROOT = Path(__file__).resolve().parents[1] + + +def test_placeholder_run_writes_phase5_artifact_bundle(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=91, + output_root=tmp_path / "runs", + ) + + required = [ + "run_summary.json", + "scenario_resolved.json", + "backend_capabilities.json", + "environment.json", + "provenance.json", + "metrics.json", + "validation.json", + "replay_validation.json", + "states.jsonl", + "events.jsonl", + "commands.jsonl", + "contacts.jsonl", + "observations.jsonl", + "actions.jsonl", + "rewards.jsonl", + "artifact_manifest.json", + "checksums.json", + ] + for name in required: + assert (run_dir / name).exists(), name + + manifest = json.loads((run_dir / "artifact_manifest.json").read_text(encoding="utf-8")) + checksums = json.loads((run_dir / "checksums.json").read_text(encoding="utf-8")) + assert manifest["backend"] == "placeholder" + assert manifest["artifact_count"] >= len(required) - 1 + assert "run_summary.json" in checksums["files"] + + +def test_dataset_index_export_and_validation(tmp_path: Path) -> None: + runs_root = tmp_path / "runs" + generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ + ROOT / "configs" / "scenarios" / "normal.yaml", + ROOT / "configs" / "scenarios" / "camera_occlusion.yaml", + ], + episodes=2, + seed=92, + output_root=runs_root, + ) + out_dir = tmp_path / "dataset" + + index = dataset_index(runs_root) + summary = dataset_summary(runs_root) + manifest = export_dataset(runs_root, out_dir) + validation = validate_dataset(out_dir) + + assert index["run_count"] == 1 + assert summary["episode_count"] == 2 + assert manifest["record_count"] > 0 + assert validation["valid"] is True + assert (out_dir / "episodes.jsonl").exists() + assert (out_dir / "imitation_learning.jsonl").exists() + assert (out_dir / "rl_transitions.jsonl").exists() + assert (out_dir / "metadata.csv").exists() diff --git a/tests/test_autonomy.py b/tests/test_autonomy.py new file mode 100644 index 0000000..dd4ff32 --- /dev/null +++ b/tests/test_autonomy.py @@ -0,0 +1,67 @@ +from pathlib import Path + +from medsim.autonomy.benchmark import run_autonomy_benchmark +from medsim.autonomy.export import export_demonstrations +from medsim.autonomy.safety import SafetyShield +from medsim.autonomy.schemas import AutonomyAction +from medsim.config.loader import load_scenario_config, load_scene_config +from medsim.sim.env import SurgicalEnv +from medsim.sim.placeholder_backend import PlaceholderBackend + +ROOT = Path(__file__).resolve().parents[1] + + +def test_safety_shield_rejects_excessive_delta() -> None: + scene = load_scene_config(ROOT / "configs" / "base_scene.yaml") + scenario = load_scenario_config(ROOT / "configs" / "scenarios" / "normal.yaml") + env = SurgicalEnv(PlaceholderBackend(), scene) + env.reset(seed=1, scenario=scenario) + shield = SafetyShield(scene) + + action, events = shield.filter_action( + AutonomyAction(tool_delta_position={"right_driver": [99.0, 0.0, 0.0]}), + env.get_state(), + ) + + assert action.tool_deltas == {} + assert events + assert events[0].rejected is True + env.close() + + +def test_autonomy_benchmark_writes_artifacts(tmp_path: Path) -> None: + out_dir = tmp_path / "autonomy_run" + + payload = run_autonomy_benchmark( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_path=ROOT / "configs" / "scenarios" / "normal.yaml", + backend_name="placeholder", + policy_name="scripted", + episodes=1, + out_dir=out_dir, + ) + + assert payload["metrics"]["simulation_only"] is True + assert (out_dir / "policy_rollout.json").exists() + assert (out_dir / "observations.jsonl").exists() + assert (out_dir / "actions.jsonl").exists() + assert (out_dir / "rewards.jsonl").exists() + assert (out_dir / "safety_events.jsonl").exists() + assert (out_dir / "autonomy_metrics.json").exists() + + +def test_autonomy_export_demos(tmp_path: Path) -> None: + runs = tmp_path / "runs" + run_autonomy_benchmark( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_path=ROOT / "configs" / "scenarios" / "normal.yaml", + backend_name="placeholder", + policy_name="scripted", + episodes=1, + out_dir=runs / "autonomy_demo", + ) + + manifest = export_demonstrations(runs, tmp_path / "dataset") + + assert manifest["row_count"] > 0 + assert (tmp_path / "dataset" / "demo_policy.jsonl").exists() diff --git a/tests/test_backend_registry.py b/tests/test_backend_registry.py new file mode 100644 index 0000000..1ca314b --- /dev/null +++ b/tests/test_backend_registry.py @@ -0,0 +1,62 @@ +from medsim.sim.backend_registry import get_backend_registry +from medsim.sim.base import BackendProtocol +from medsim.sim.capabilities import BackendHealthStatus, FidelityLevel +from medsim.sim.errors import BackendNotFoundError +from medsim.sim.placeholder_backend import PlaceholderBackend +from medsim.sim.sofa.availability import check_sofa_available + + +def test_default_backend_registry_lists_builtin_backends() -> None: + registry = get_backend_registry() + + assert registry.names() == ["placeholder", "sofa"] + assert isinstance(registry.create("placeholder"), PlaceholderBackend) + assert isinstance(registry.create("placeholder"), BackendProtocol) + + +def test_placeholder_capability_and_health_surface_is_honest() -> None: + registry = get_backend_registry() + capabilities = registry.capabilities("placeholder") + health = registry.health("placeholder") + + assert capabilities.name == "placeholder" + assert capabilities.available is True + assert capabilities.real_physics is False + assert capabilities.deterministic is True + assert capabilities.supports_replay is True + assert capabilities.supports_tissue_deformation is False + assert capabilities.fidelity_level == FidelityLevel.PLACEHOLDER_DETERMINISTIC + assert health.backend == "placeholder" + assert health.status == BackendHealthStatus.AVAILABLE + + +def test_sofa_capability_and_health_report_missing_dependency_without_crashing() -> None: + registry = get_backend_registry() + capabilities = registry.capabilities("sofa") + health = registry.health("sofa") + availability = check_sofa_available() + + assert capabilities.name == "sofa" + assert capabilities.real_physics is False + assert health.backend == "sofa" + if availability.available: + assert capabilities.available is True + assert capabilities.fidelity_level == FidelityLevel.SOFA_MINIMAL_UNVALIDATED + assert health.status == BackendHealthStatus.DEGRADED + else: + assert capabilities.available is False + assert "SOFA/SofaPython3" in capabilities.missing_dependencies + assert capabilities.fidelity_level == FidelityLevel.SOFA_UNAVAILABLE_ADAPTER_ONLY + assert health.status == BackendHealthStatus.UNAVAILABLE + assert health.install_hint + + +def test_unknown_backend_raises_clear_error() -> None: + registry = get_backend_registry() + + try: + registry.create("missing") + except BackendNotFoundError as exc: + assert "Registered backends" in str(exc) + else: + raise AssertionError("Expected BackendNotFoundError") diff --git a/tests/test_eval_runner.py b/tests/test_eval_runner.py new file mode 100644 index 0000000..e5d5642 --- /dev/null +++ b/tests/test_eval_runner.py @@ -0,0 +1,67 @@ +from pathlib import Path + +from medsim.cli import benchmark_command, generate_dataset, replay_validate +from medsim.eval.runner import compare_runs, run_eval_report + +ROOT = Path(__file__).resolve().parents[1] + + +def test_run_eval_report_writes_phase6_artifacts(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=101, + output_root=tmp_path / "runs", + ) + replay_validate(artifacts_dir=run_dir) + out_dir = tmp_path / "eval" / "eval_test" + + summary = run_eval_report(run_dir, out_dir) + + assert summary["run_count"] == 1 + assert (out_dir / "eval_summary.json").exists() + assert (out_dir / "eval_report.md").exists() + assert (out_dir / "runs.csv").exists() + assert (out_dir / "failures.json").exists() + assert (out_dir / "metric_distributions.json").exists() + + +def test_compare_runs_reports_metric_deltas(tmp_path: Path) -> None: + first = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=102, + output_root=tmp_path / "runs", + ) + second = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "tool_collision.yaml"], + episodes=1, + seed=2, + output_root=tmp_path / "runs", + ) + + comparison = compare_runs(first, second) + + assert comparison["run_a"] == first.name + assert comparison["run_b"] == second.name + assert "success_rate_delta" in comparison + assert comparison["failure_count_delta"] >= 0 + + +def test_placeholder_benchmark_command_runs(tmp_path: Path, monkeypatch) -> None: + monkeypatch.chdir(tmp_path) + + payload = benchmark_command( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_path=ROOT / "configs" / "scenarios" / "normal.yaml", + backend_name="placeholder", + episodes=1, + ) + + assert payload["backend"] == "placeholder" + assert payload["episodes"] == 1 + assert Path(payload["run_dir"]).exists() + assert Path(payload["eval_dir"]).exists() diff --git a/tests/test_manifest_and_taxonomy.py b/tests/test_manifest_and_taxonomy.py index 9319b47..39eb44a 100644 --- a/tests/test_manifest_and_taxonomy.py +++ b/tests/test_manifest_and_taxonomy.py @@ -3,7 +3,7 @@ from medsim.cli import generate_dataset from medsim.config.loader import load_scenario_config, load_scene_config -from medsim.data.schema import RunManifest +from medsim.data.schema import EpisodeSummary, RunManifest, StepRecord from medsim.sim.env import SurgicalEnv from medsim.sim.placeholder_backend import PlaceholderBackend from medsim.sim.taxonomy import CANONICAL_EVENT_TYPES, CANONICAL_FAILURE_REASONS @@ -31,6 +31,70 @@ def test_run_manifest_is_written(tmp_path: Path) -> None: assert manifest.seed_start == 80 assert manifest.episode_count == 1 assert manifest.artifacts["episodes_dir"] == "episodes" + assert manifest.backend == "placeholder" + assert manifest.fidelity_level == "placeholder_deterministic" + assert manifest.physical_accuracy is False + assert manifest.intended_use == "infrastructure_validation" + assert manifest.use_mode == "research" + assert manifest.risk_acknowledged is False + + +def test_placeholder_run_artifacts_include_claim_boundary_metadata(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=81, + output_root=tmp_path / "runs", + ) + + with (run_dir / "config_snapshot.json").open("r", encoding="utf-8") as handle: + snapshot = json.load(handle) + with (run_dir / "aggregate_metrics.json").open("r", encoding="utf-8") as handle: + aggregate = json.load(handle) + with (run_dir / "episodes" / "episode_0001_summary.json").open( + "r", + encoding="utf-8", + ) as handle: + summary = EpisodeSummary.model_validate(json.load(handle)) + first_record = StepRecord.model_validate_json( + (run_dir / "episodes" / "episode_0001.jsonl").read_text(encoding="utf-8").splitlines()[0] + ) + + artifacts = [snapshot, aggregate, summary.model_dump(mode="json"), first_record.model_dump()] + for artifact in artifacts: + assert artifact["backend"] == "placeholder" + assert artifact["fidelity_level"] == "placeholder_deterministic" + assert artifact["physical_accuracy"] is False + assert artifact["intended_use"] == "infrastructure_validation" + assert "patient_care" in artifact["disallowed_uses"] + assert first_record.deformation_summary["source"] == "placeholder_synthetic" + assert first_record.deformation_summary["physical_accuracy"] is False + assert snapshot["scene"]["pilot"]["use_mode"] == "research" + assert aggregate["backend"] == "placeholder" + + +def test_placeholder_collision_records_are_labeled_synthetic(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "tool_collision.yaml"], + episodes=1, + seed=2, + output_root=tmp_path / "runs", + ) + + records = [ + StepRecord.model_validate_json(line) + for line in (run_dir / "episodes" / "episode_0001.jsonl") + .read_text(encoding="utf-8") + .splitlines() + ] + contacts = [contact for record in records for contact in record.contacts] + + assert contacts + assert all(contact["source"] == "placeholder_synthetic" for contact in contacts) + assert all(contact["physical_accuracy"] is False for contact in contacts) + assert all(contact["force_n"] is None for contact in contacts) def test_placeholder_events_and_config_failures_are_canonical() -> None: diff --git a/tests/test_phantom_validation.py b/tests/test_phantom_validation.py new file mode 100644 index 0000000..ae16515 --- /dev/null +++ b/tests/test_phantom_validation.py @@ -0,0 +1,28 @@ +from pathlib import Path + +from medsim.cli import generate_dataset +from medsim.data.exporters import write_json +from medsim.validation.compare import compare_phantom_run +from medsim.validation.phantom import synthetic_bench_fixture + +ROOT = Path(__file__).resolve().parents[1] + + +def test_synthetic_phantom_comparison_generates_report(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=111, + output_root=tmp_path / "runs", + ) + bench = synthetic_bench_fixture() + bench_path = tmp_path / "bench.json" + write_json(bench_path, bench) + + report = compare_phantom_run(run_dir, bench_path, tmp_path / "validation") + + assert report.bench_trial_id == bench.trial_id + assert report.trajectory_rmse is not None + assert report.phantom_validated_claim_allowed is False + assert (tmp_path / "validation" / "phantom_comparison.json").exists() diff --git a/tests/test_replay_validation.py b/tests/test_replay_validation.py index b61a931..0cd765a 100644 --- a/tests/test_replay_validation.py +++ b/tests/test_replay_validation.py @@ -25,6 +25,9 @@ def test_replay_validation_passes_and_actions_are_recorded(tmp_path: Path) -> No assert payload["aggregate"]["pass_count"] == 2 first_validation = ReplayValidationResult.model_validate(payload["results"][0]) assert first_validation.passed + assert first_validation.backend == "placeholder" + assert first_validation.fidelity_level == "placeholder_deterministic" + assert first_validation.physical_accuracy is False first_log = run_dir / "episodes" / "episode_0001.jsonl" records = [ diff --git a/tests/test_robotics_dry_run.py b/tests/test_robotics_dry_run.py new file mode 100644 index 0000000..93b5fbe --- /dev/null +++ b/tests/test_robotics_dry_run.py @@ -0,0 +1,26 @@ +from medsim.robotics import DryRunRobotAdapter, RobotCommand +from medsim.robotics.calibration import CalibrationRequirement + + +def test_dry_run_preview_never_actuates() -> None: + adapter = DryRunRobotAdapter() + preview = adapter.preview(RobotCommand(tool_id="right_driver", delta_mm=[1.0, 0.0, 0.0])) + + assert preview.would_actuate is False + assert preview.no_patient_use is True + assert any("dry-run" in message for message in preview.messages) + + +def test_dry_run_safety_rejects_excessive_command() -> None: + adapter = DryRunRobotAdapter() + preview = adapter.preview(RobotCommand(tool_id="right_driver", delta_mm=[99.0, 0.0, 0.0])) + + assert preview.accepted is False + assert any("max delta" in message for message in preview.messages) + + +def test_calibration_requirement_defaults_are_strict() -> None: + requirement = CalibrationRequirement() + + assert requirement.workspace_registration_required + assert requirement.emergency_stop_test_required diff --git a/tests/test_scenario_validation.py b/tests/test_scenario_validation.py new file mode 100644 index 0000000..b37740e --- /dev/null +++ b/tests/test_scenario_validation.py @@ -0,0 +1,71 @@ +from pathlib import Path + +import yaml + +from medsim.config.loader import load_scene_config +from medsim.sim.scenario_validation import validate_config_path, validate_config_payload + +ROOT = Path(__file__).resolve().parents[1] + + +def test_scene_config_defaults_include_production_schema_sections() -> None: + scene = load_scene_config(ROOT / "configs" / "base_scene.yaml") + + assert scene.tissue_material.young_modulus_pa > 0 + assert scene.solver.iterations > 0 + assert scene.collision.enabled is True + assert scene.needle.radius_mm > 0 + assert scene.tool_control.max_delta_mm > 0 + assert scene.safety_boundaries.prohibit_patient_use is True + + +def test_validate_config_path_accepts_existing_scene_and_scenario() -> None: + scene_report = validate_config_path(ROOT / "configs" / "base_scene.yaml") + scenario_report = validate_config_path(ROOT / "configs" / "scenarios" / "normal.yaml") + + assert scene_report.valid + assert scene_report.kind == "scene" + assert scene_report.backend == "placeholder" + assert scene_report.fidelity_level == "placeholder_deterministic" + assert scenario_report.valid + assert scenario_report.kind == "scenario" + + +def test_validate_config_catches_invalid_ranges(tmp_path: Path) -> None: + payload = yaml.safe_load((ROOT / "configs" / "base_scene.yaml").read_text()) + payload["tissue_material"] = {"poisson_ratio": 0.8} + path = tmp_path / "invalid_scene.yaml" + path.write_text(yaml.safe_dump(payload), encoding="utf-8") + + report = validate_config_path(path) + + assert not report.valid + assert any(issue.code == "schema_validation_error" for issue in report.issues) + assert any(issue.location == ["tissue_material", "poisson_ratio"] for issue in report.issues) + + +def test_validate_config_blocks_unsafe_claims() -> None: + payload = { + "schema_version": "0.1", + "scenario_id": "unsafe", + "use_mode": "patient_care", + "fidelity_level": "clinically_validated", + "perturbations": [], + } + + report = validate_config_payload(payload, kind="scenario") + + assert not report.valid + codes = {issue.code for issue in report.issues} + assert "prohibited_use" in codes + assert "clinical_fidelity_blocked" in codes + + +def test_validate_config_requires_pilot_risk_acknowledgement() -> None: + payload = yaml.safe_load((ROOT / "configs" / "base_scene.yaml").read_text()) + payload["pilot"] = {"use_mode": "hospital_pilot_non_patient", "risk_acknowledged": False} + + report = validate_config_payload(payload, kind="scene") + + assert not report.valid + assert any(issue.code == "risk_acknowledgement_required" for issue in report.issues) diff --git a/tests/test_sofa_adapter_skeleton.py b/tests/test_sofa_adapter_skeleton.py index da6c3f5..a732947 100644 --- a/tests/test_sofa_adapter_skeleton.py +++ b/tests/test_sofa_adapter_skeleton.py @@ -21,6 +21,10 @@ from medsim.sim.sofa.scene_plan import MANDATORY_SOFA_STATE_FIELDS from medsim.sim.sofa.state_conversion import observation_from_state, snapshot_to_simulation_state from medsim.sim.sofa_backend import SofaBackend +from medsim.sim.sofa_extractors import SofaStateExtractor as TopLevelSofaStateExtractor +from medsim.sim.sofa_materials import default_sofa_tissue_material +from medsim.sim.sofa_preflight import run_sofa_preflight +from medsim.sim.sofa_scene_builder import build_sofa_scene_plan as top_level_scene_plan_builder from medsim.sim.state import SimulationState from medsim.sim.taxonomy import EventType @@ -93,6 +97,36 @@ def test_sofa_scene_plan_maps_current_config_without_sofa() -> None: assert payload["scenario"]["scenario_id"] == "camera_occlusion" +def test_sofa_preflight_reports_missing_dependency_without_crashing() -> None: + scene = load_scene_config(ROOT / "configs" / "base_scene.yaml") + scenario = load_scenario_config(ROOT / "configs" / "scenarios" / "normal.yaml") + + report = run_sofa_preflight(scene, scenario, attempt_runtime=True) + + assert report.backend == "sofa" + assert report.scene_plan_valid + assert "tissue" in report.canonical_scene_components + assert "needle" in report.canonical_scene_components + assert report.capabilities.real_physics is False + if not check_sofa_available().available: + assert report.available is False + assert report.runtime_build_succeeded is False + assert report.install_hint + assert report.errors + + +def test_top_level_sofa_compatibility_modules_export_current_contracts() -> None: + scene = load_scene_config(ROOT / "configs" / "base_scene.yaml") + scenario = load_scenario_config(ROOT / "configs" / "scenarios" / "normal.yaml") + plan = top_level_scene_plan_builder(scene, scenario) + material = default_sofa_tissue_material() + + assert plan.scene_id == scene.scene_id + assert TopLevelSofaStateExtractor().required_state_fields() + assert material.validation_status == "unvalidated_default" + assert material.physical_accuracy is False + + def test_sofa_backend_scene_plan_only_workflow() -> None: scene = load_scene_config(ROOT / "configs" / "base_scene.yaml") scenario = load_scenario_config(ROOT / "configs" / "scenarios" / "normal.yaml") diff --git a/tests/test_validation_bundle.py b/tests/test_validation_bundle.py new file mode 100644 index 0000000..3d5596d --- /dev/null +++ b/tests/test_validation_bundle.py @@ -0,0 +1,25 @@ +from pathlib import Path + +from medsim.cli import generate_dataset +from medsim.validation.bundle import export_validation_bundle + +ROOT = Path(__file__).resolve().parents[1] + + +def test_validation_bundle_exports_required_files(tmp_path: Path) -> None: + run_dir = generate_dataset( + scene_path=ROOT / "configs" / "base_scene.yaml", + scenario_paths=[ROOT / "configs" / "scenarios" / "normal.yaml"], + episodes=1, + seed=121, + output_root=tmp_path / "runs", + ) + + report = export_validation_bundle(run_dir, tmp_path / "bundle") + + assert report["complete"] is True + assert (tmp_path / "bundle" / "validation_bundle_manifest.json").exists() + assert (tmp_path / "bundle" / "risk_disclosure.json").exists() + assert (tmp_path / "bundle" / "fidelity_disclosure.json").exists() + assert (tmp_path / "bundle" / "validation_bundle_report.md").exists() + assert report["pilot_metadata"]["use_mode"] == "research" diff --git a/tests/test_workbench_api.py b/tests/test_workbench_api.py index 8865488..d244937 100644 --- a/tests/test_workbench_api.py +++ b/tests/test_workbench_api.py @@ -28,6 +28,37 @@ def test_health_endpoint(client: TestClient) -> None: assert "medsim_version" in payload +def test_production_health_and_backend_endpoints(client: TestClient) -> None: + root_health = client.get("/health") + assert root_health.status_code == 200 + assert root_health.json()["status"] == "ok" + + dependencies = client.get("/api/dependencies") + assert dependencies.status_code == 200 + assert "sofa" in dependencies.json() + + backends = client.get("/api/backends") + assert backends.status_code == 200 + payload = backends.json() + names = {backend["name"] for backend in payload["backends"]} + assert {"placeholder", "sofa"} <= names + + placeholder_health = client.get("/api/backends/placeholder/health") + assert placeholder_health.status_code == 200 + assert placeholder_health.json()["status"] == "available" + + sofa_capabilities = client.get("/api/backends/sofa/capabilities") + assert sofa_capabilities.status_code == 200 + assert sofa_capabilities.json()["real_physics"] is False + + sofa_preflight = client.get("/api/sofa/preflight") + assert sofa_preflight.status_code == 200 + assert sofa_preflight.json()["scene_plan_valid"] is True + + missing = client.get("/api/backends/missing/health") + assert missing.status_code == 404 + + def test_metadata_endpoint_is_honest_about_backends(client: TestClient) -> None: response = client.get("/api/metadata") assert response.status_code == 200 @@ -80,6 +111,45 @@ def test_scene_preview_endpoint(client: TestClient) -> None: assert "scene_plan" in data["sofa_preview"] +def test_config_validation_endpoint(client: TestClient) -> None: + response = client.post( + "/api/scenarios/validate", + json={ + "kind": "scenario", + "config": { + "schema_version": "0.1", + "scenario_id": "unsafe", + "use_mode": "patient_care", + "perturbations": [], + }, + }, + ) + assert response.status_code == 200 + payload = response.json() + assert payload["valid"] is False + assert any(issue["code"] == "prohibited_use" for issue in payload["issues"]) + + +def test_api_artifact_path_traversal_is_rejected(client: TestClient) -> None: + response = client.post("/api/dataset/index", json={"runs": "../outside"}) + + assert response.status_code == 400 + assert "escapes" in response.json()["detail"] + + +def test_optional_api_key_auth(monkeypatch) -> None: + monkeypatch.setenv("MEDSIM_API_KEY", "secret") + app = create_app( + scene_path=Path("configs/base_scene.yaml"), + scenario_dir=Path("configs/scenarios"), + ) + keyed_client = TestClient(app) + + assert keyed_client.get("/api/health").status_code == 200 + assert keyed_client.get("/api/metadata").status_code == 401 + assert keyed_client.get("/api/metadata", headers={"x-api-key": "secret"}).status_code == 200 + + def test_create_run_auto_completes_and_exposes_artifacts(client: TestClient) -> None: compiled = client.post( "/api/prompt/compile",