conorluddy · conorluddy · May 25, 2026 · May 25, 2026
diff --git a/ios-simulator-skill/skills/ios-simulator-skill/scripts/common/hang_pipeline.py b/ios-simulator-skill/skills/ios-simulator-skill/scripts/common/hang_pipeline.py
@@ -114,6 +114,7 @@ class Cluster:
     symbol_or_prefix: str
     sample_event: NormalisedEvent
     auto_sample: dict | None = None
+    auto_samples: list[dict] | None = None
 
 
 @dataclass
@@ -476,16 +477,33 @@ def format_cluster_detail(cluster: Cluster, events: list[NormalisedEvent]) -> st
         )
         if event.raw_message:
             lines.append(f"      msg: {event.raw_message[:120]}")
-    if cluster.auto_sample:
-        stack = cluster.auto_sample.get("stack")
-        if stack:
-            lines.append("Auto-sample stack (top 10):")
-            lines.extend(f"  {frame}" for frame in stack[:10])
-        else:
-            lines.append(f"Auto-sample unavailable: {cluster.auto_sample.get('reason', 'unknown')}")
+    for sample in _iter_auto_samples(cluster):
+        lines.extend(_format_auto_sample(sample))
     return "\n".join(lines)
 
 
+def _iter_auto_samples(cluster: Cluster) -> list[dict]:
+    """Yield auto-samples for a cluster, preferring the multi-kind list and
+    falling back to the legacy single ``auto_sample`` field for old summaries."""
+    if cluster.auto_samples:
+        return cluster.auto_samples
+    if cluster.auto_sample:
+        return [cluster.auto_sample]
+    return []
+
+
+def _format_auto_sample(sample: dict) -> list[str]:
+    """Render one auto-sample block: header + first 10 stack lines or a reason."""
+    kind = sample.get("kind") or "auto-sample"
+    stack = sample.get("stack")
+    if not stack:
+        return [f"{kind}: unavailable ({sample.get('reason', 'unknown')})"]
+    # Stack is multi-line text from `sample` or `spindump`. Show the first 10
+    # non-empty lines so the cluster detail stays bounded.
+    head = [line for line in stack.splitlines() if line.strip()][:10]
+    return [f"{kind} stack (top 10):", *(f"  {line}" for line in head)]
+
+
 def format_diff(diff: dict) -> str:
     """Render a diff_sessions() result for human + agent consumption."""
     if diff.get("version_mismatch"):
@@ -724,6 +742,7 @@ def _cluster_from_json(payload: dict) -> Cluster:
         symbol_or_prefix=payload["symbol_or_prefix"],
         sample_event=sample,
         auto_sample=payload.get("auto_sample"),
+        auto_samples=payload.get("auto_samples"),
     )
 
 
@@ -763,9 +782,24 @@ class SummaryBuilder:
     dropped_below_threshold: int = 0
     extras: dict = field(default_factory=dict)
 
-    def build(self, events: list[NormalisedEvent], top_n: int | None = None) -> SessionSummary:
-        """Cluster, aggregate, rank, and emit a SessionSummary."""
+    def build(
+        self,
+        events: list[NormalisedEvent],
+        top_n: int | None = None,
+        auto_samples_by_fp: dict[str, list[dict]] | None = None,
+    ) -> SessionSummary:
+        """Cluster, aggregate, rank, and emit a SessionSummary.
+
+        ``auto_samples_by_fp`` attaches per-fingerprint stack captures (from
+        ``--auto-sample`` / ``--auto-spindump``) onto the matching clusters so
+        they survive into ``summary.json``.
+        """
         clusters = rank_clusters(cluster_events(events), top_n=top_n)
+        if auto_samples_by_fp:
+            for cluster in clusters:
+                samples = auto_samples_by_fp.get(cluster.fingerprint)
+                if samples:
+                    cluster.auto_samples = samples
         aggregates = {
             "bursts": detect_temporal_bursts(events),
             "quiet_periods": detect_quiet_periods(events),

diff --git a/ios-simulator-skill/skills/ios-simulator-skill/scripts/common/hang_sessions.py b/ios-simulator-skill/skills/ios-simulator-skill/scripts/common/hang_sessions.py
@@ -221,12 +221,17 @@ def stash_auto_sample(self, session_id: str, fingerprint: str, sample: dict) ->
             handle.flush()
             os.fsync(handle.fileno())
 
-    def read_auto_samples(self, session_id: str) -> dict[str, dict]:
-        """Return ``{fingerprint: sample}`` with last-write-wins per fingerprint."""
+    def read_auto_samples(self, session_id: str) -> dict[str, list[dict]]:
+        """Return ``{fingerprint: [sample, ...]}`` preserving write order.
+
+        Multiple capture mechanisms (e.g. ``--auto-sample`` + ``--auto-spindump``)
+        can stash distinct records under one fingerprint; callers disambiguate
+        via the ``kind`` field on each sample payload.
+        """
         path = self._auto_samples_path(session_id)
         if not path.exists():
             return {}
-        samples: dict[str, dict] = {}
+        samples: dict[str, list[dict]] = {}
         with open(path) as handle:
             for raw in handle:
                 line = raw.strip()
@@ -239,7 +244,7 @@ def read_auto_samples(self, session_id: str) -> dict[str, dict]:
                 fingerprint = payload.get("fingerprint")
                 if fingerprint is None:
                     continue
-                samples[fingerprint] = payload.get("sample")
+                samples.setdefault(fingerprint, []).append(payload.get("sample"))
         return samples
 
     def read_events(self, session_id: str) -> list:
@@ -395,7 +400,11 @@ def build_summary(
             dropped_below_threshold=dropped_below_threshold,
             extras=extras or {},
         )
-        return builder.build(events, top_n=top_n)
+        return builder.build(
+            events,
+            top_n=top_n,
+            auto_samples_by_fp=self.read_auto_samples(session_id),
+        )
 
     # === PRIVATE ===
 

diff --git a/ios-simulator-skill/skills/ios-simulator-skill/scripts/hang_watcher.py b/ios-simulator-skill/skills/ios-simulator-skill/scripts/hang_watcher.py
@@ -605,9 +605,10 @@ def get_details(
                 e for e in self.store.read_events(session_id) if e.fingerprint == target.fingerprint
             ]
             if resample:
-                target.auto_sample = _attempt_auto_sample(
+                fresh = _attempt_auto_sample(
                     meta.args.get("udid", ""), events[0].pid if events else 0
                 )
+                target.auto_samples = [fresh]
             if json_mode:
                 from common.hang_pipeline import cluster_to_json
 
@@ -687,6 +688,7 @@ def run_worker(self, session_id: str) -> int:
         bundle_id = args.get("bundle_id")
         predicate_override = args.get("predicate")
         auto_sample = bool(args.get("auto_sample", False))
+        auto_spindump = bool(args.get("auto_spindump", False))
         udid = args["udid"]
         predicate = _resolve_predicate(predicate_override)
         max_restarts = env_int("IOS_SIM_HANG_MAX_RESTARTS", DEFAULT_MAX_STREAM_RESTARTS)
@@ -696,6 +698,7 @@ def run_worker(self, session_id: str) -> int:
         events_path = self.store.events_path(session_id)
         counters = {"total": 0, "matched": 0, "dropped": 0, "stream_restarts": 0}
         sampled_fingerprints: set[str] = set()
+        spindumped_fingerprints: set[str] = set()
         stop_flag = {"value": False}
         cap_state = {"hit": False}  # set by raw reader when size cap exceeded
 
@@ -773,7 +776,9 @@ def _spawn_log_stream() -> subprocess.Popen:
                             bundle_id=bundle_id,
                             min_hang_ms=min_hang_ms,
                             auto_sample=auto_sample,
+                            auto_spindump=auto_spindump,
                             sampled_fingerprints=sampled_fingerprints,
+                            spindumped_fingerprints=spindumped_fingerprints,
                             session_id=session_id,
                             session_start_ms=meta.started_at_ms,
                             udid=udid,
@@ -917,7 +922,9 @@ def _read_stream_into_events(
         bundle_id: str | None,
         min_hang_ms: int,
         auto_sample: bool,
+        auto_spindump: bool,
         sampled_fingerprints: set[str],
+        spindumped_fingerprints: set[str],
         session_id: str,
         session_start_ms: int,
         udid: str,
@@ -976,6 +983,11 @@ def _read_stream_into_events(
                 self._stash_auto_sample(
                     session_id, normalised, _attempt_auto_sample(udid, normalised.pid)
                 )
+            if auto_spindump and normalised.fingerprint not in spindumped_fingerprints:
+                spindumped_fingerprints.add(normalised.fingerprint)
+                self._stash_auto_sample(
+                    session_id, normalised, _attempt_auto_spindump(udid, normalised.pid)
+                )
             out_handle.write(event_to_jsonl(normalised) + "\n")
         return proc.poll()
 
@@ -1018,6 +1030,8 @@ def _stash_auto_sample(self, session_id: str, normalised, sample: dict) -> None:
 
 SAMPLE_DURATION_SECONDS = 1
 SAMPLE_TIMEOUT_SECONDS = 5
+SPINDUMP_DURATION_SECONDS = 1
+SPINDUMP_TIMEOUT_SECONDS = 10
 
 
 def _attempt_auto_sample(udid: str, pid: int) -> dict:
@@ -1098,6 +1112,82 @@ def _attempt_auto_sample(udid: str, pid: int) -> dict:
     }
 
 
+def _attempt_auto_spindump(udid: str, pid: int) -> dict:
+    """Capture a hang report via ``xcrun simctl spawn <udid> spindump``.
+
+    ``spindump`` is Apple's own hang-report tool — it produces a structured
+    text report explicitly designed for the "what was main thread doing"
+    question. Heavier than ``sample`` so we run a slightly longer timeout.
+    """
+    captured_at_ms = int(time.time() * 1000)
+    if not udid:
+        return {
+            "kind": "spindump",
+            "stack": None,
+            "captured_at_ms": captured_at_ms,
+            "symbolicated": False,
+            "reason": "no udid available",
+        }
+    if not pid:
+        return {
+            "kind": "spindump",
+            "stack": None,
+            "captured_at_ms": captured_at_ms,
+            "symbolicated": False,
+            "reason": "no pid available",
+        }
+    cmd = [
+        "xcrun",
+        "simctl",
+        "spawn",
+        udid,
+        "spindump",
+        str(pid),
+        str(SPINDUMP_DURATION_SECONDS),
+        "-file",
+        "-",
+    ]
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=SPINDUMP_TIMEOUT_SECONDS,
+            check=False,
+        )
+    except subprocess.TimeoutExpired:
+        return {
+            "kind": "spindump",
+            "stack": None,
+            "captured_at_ms": captured_at_ms,
+            "symbolicated": False,
+            "reason": "timeout",
+        }
+    except FileNotFoundError:
+        return {
+            "kind": "spindump",
+            "stack": None,
+            "captured_at_ms": captured_at_ms,
+            "symbolicated": False,
+            "reason": "xcrun not found",
+        }
+    if result.returncode != 0 or not result.stdout.strip():
+        return {
+            "kind": "spindump",
+            "stack": None,
+            "captured_at_ms": captured_at_ms,
+            "symbolicated": False,
+            "reason": (result.stderr.strip() or f"spindump exited {result.returncode}")[:200],
+        }
+    return {
+        "kind": "spindump",
+        "stack": result.stdout,
+        "captured_at_ms": captured_at_ms,
+        "symbolicated": False,
+        "reason": None,
+    }
+
+
 # === CLI ===
 
 
@@ -1202,6 +1292,11 @@ def main():
         action="store_true",
         help="On hang, capture a main-thread stack via `xcrun simctl spawn <udid> sample`",
     )
+    parser.add_argument(
+        "--auto-spindump",
+        action="store_true",
+        help="On hang, capture a spindump report via `xcrun simctl spawn <udid> spindump`",
+    )
     parser.add_argument("--top", type=int, dest="top_n", help="Top-N clusters to retain in summary")
     parser.add_argument(
         "--all", action="store_true", dest="all_clusters", help="Keep all clusters (no top-N cap)"
@@ -1265,6 +1360,7 @@ def main():
             "bundle_id": args.bundle_id,
             "predicate": args.predicate,
             "auto_sample": args.auto_sample,
+            "auto_spindump": args.auto_spindump,
             "raw_capture": args.raw_capture,
             "max_size_mb": args.max_size_mb,
             "no_gzip": args.no_gzip,

diff --git a/tests/test_auto_sample.py b/tests/test_auto_sample.py
@@ -113,3 +113,60 @@ def _fake_run(*_a, **_kw):
 
     assert result["stack"] is None
     assert result["reason"] == "xcrun not found"
+
+
+# === spindump ===
+
+
+def test_spindump_returns_stack_on_success(monkeypatch):
+    captured: dict = {}
+
+    def _fake_run(cmd, **_kw):
+        captured["cmd"] = cmd
+        return _FakeCompleted(stdout="Process: Foo [1234]\nThread 0x1 main\n  foo()\n")
+
+    monkeypatch.setattr(subprocess, "run", _fake_run)
+    result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)
+
+    assert result["kind"] == "spindump"
+    assert "Process: Foo" in result["stack"]
+    assert result["reason"] is None
+    assert captured["cmd"][:5] == ["xcrun", "simctl", "spawn", "ABC-123", "spindump"]
+    assert "1234" in captured["cmd"]
+
+
+def test_spindump_missing_udid_short_circuits(monkeypatch):
+    called = {"n": 0}
+    monkeypatch.setattr(
+        subprocess,
+        "run",
+        lambda *_a, **_kw: (called.__setitem__("n", called["n"] + 1) or _FakeCompleted()),
+    )
+    result = hang_watcher._attempt_auto_spindump("", 1234)
+
+    assert result["stack"] is None
+    assert result["reason"] == "no udid available"
+    assert called["n"] == 0
+
+
+def test_spindump_timeout_returns_reason(monkeypatch):
+    def _fake_run(cmd, **_kw):
+        raise subprocess.TimeoutExpired(cmd=cmd, timeout=10)
+
+    monkeypatch.setattr(subprocess, "run", _fake_run)
+    result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)
+
+    assert result["stack"] is None
+    assert result["reason"] == "timeout"
+    assert result["kind"] == "spindump"
+
+
+def test_spindump_nonzero_exit_returns_reason(monkeypatch):
+    def _fake_run(*_a, **_kw):
+        return _FakeCompleted(stdout="", stderr="No such process\n", returncode=1)
+
+    monkeypatch.setattr(subprocess, "run", _fake_run)
+    result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)
+
+    assert result["stack"] is None
+    assert "No such process" in result["reason"]