Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ class Cluster:
symbol_or_prefix: str
sample_event: NormalisedEvent
auto_sample: dict | None = None
auto_samples: list[dict] | None = None


@dataclass
Expand Down Expand Up @@ -476,16 +477,33 @@ def format_cluster_detail(cluster: Cluster, events: list[NormalisedEvent]) -> st
)
if event.raw_message:
lines.append(f" msg: {event.raw_message[:120]}")
if cluster.auto_sample:
stack = cluster.auto_sample.get("stack")
if stack:
lines.append("Auto-sample stack (top 10):")
lines.extend(f" {frame}" for frame in stack[:10])
else:
lines.append(f"Auto-sample unavailable: {cluster.auto_sample.get('reason', 'unknown')}")
for sample in _iter_auto_samples(cluster):
lines.extend(_format_auto_sample(sample))
return "\n".join(lines)


def _iter_auto_samples(cluster: Cluster) -> list[dict]:
"""Yield auto-samples for a cluster, preferring the multi-kind list and
falling back to the legacy single ``auto_sample`` field for old summaries."""
if cluster.auto_samples:
return cluster.auto_samples
if cluster.auto_sample:
return [cluster.auto_sample]
return []


def _format_auto_sample(sample: dict) -> list[str]:
"""Render one auto-sample block: header + first 10 stack lines or a reason."""
kind = sample.get("kind") or "auto-sample"
stack = sample.get("stack")
if not stack:
return [f"{kind}: unavailable ({sample.get('reason', 'unknown')})"]
# Stack is multi-line text from `sample` or `spindump`. Show the first 10
# non-empty lines so the cluster detail stays bounded.
head = [line for line in stack.splitlines() if line.strip()][:10]
return [f"{kind} stack (top 10):", *(f" {line}" for line in head)]


def format_diff(diff: dict) -> str:
"""Render a diff_sessions() result for human + agent consumption."""
if diff.get("version_mismatch"):
Expand Down Expand Up @@ -724,6 +742,7 @@ def _cluster_from_json(payload: dict) -> Cluster:
symbol_or_prefix=payload["symbol_or_prefix"],
sample_event=sample,
auto_sample=payload.get("auto_sample"),
auto_samples=payload.get("auto_samples"),
)


Expand Down Expand Up @@ -763,9 +782,24 @@ class SummaryBuilder:
dropped_below_threshold: int = 0
extras: dict = field(default_factory=dict)

def build(self, events: list[NormalisedEvent], top_n: int | None = None) -> SessionSummary:
"""Cluster, aggregate, rank, and emit a SessionSummary."""
def build(
self,
events: list[NormalisedEvent],
top_n: int | None = None,
auto_samples_by_fp: dict[str, list[dict]] | None = None,
) -> SessionSummary:
"""Cluster, aggregate, rank, and emit a SessionSummary.

``auto_samples_by_fp`` attaches per-fingerprint stack captures (from
``--auto-sample`` / ``--auto-spindump``) onto the matching clusters so
they survive into ``summary.json``.
"""
clusters = rank_clusters(cluster_events(events), top_n=top_n)
if auto_samples_by_fp:
for cluster in clusters:
samples = auto_samples_by_fp.get(cluster.fingerprint)
if samples:
cluster.auto_samples = samples
aggregates = {
"bursts": detect_temporal_bursts(events),
"quiet_periods": detect_quiet_periods(events),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,12 +221,17 @@ def stash_auto_sample(self, session_id: str, fingerprint: str, sample: dict) ->
handle.flush()
os.fsync(handle.fileno())

def read_auto_samples(self, session_id: str) -> dict[str, dict]:
"""Return ``{fingerprint: sample}`` with last-write-wins per fingerprint."""
def read_auto_samples(self, session_id: str) -> dict[str, list[dict]]:
"""Return ``{fingerprint: [sample, ...]}`` preserving write order.

Multiple capture mechanisms (e.g. ``--auto-sample`` + ``--auto-spindump``)
can stash distinct records under one fingerprint; callers disambiguate
via the ``kind`` field on each sample payload.
"""
path = self._auto_samples_path(session_id)
if not path.exists():
return {}
samples: dict[str, dict] = {}
samples: dict[str, list[dict]] = {}
with open(path) as handle:
for raw in handle:
line = raw.strip()
Expand All @@ -239,7 +244,7 @@ def read_auto_samples(self, session_id: str) -> dict[str, dict]:
fingerprint = payload.get("fingerprint")
if fingerprint is None:
continue
samples[fingerprint] = payload.get("sample")
samples.setdefault(fingerprint, []).append(payload.get("sample"))
return samples

def read_events(self, session_id: str) -> list:
Expand Down Expand Up @@ -395,7 +400,11 @@ def build_summary(
dropped_below_threshold=dropped_below_threshold,
extras=extras or {},
)
return builder.build(events, top_n=top_n)
return builder.build(
events,
top_n=top_n,
auto_samples_by_fp=self.read_auto_samples(session_id),
)

# === PRIVATE ===

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -605,9 +605,10 @@ def get_details(
e for e in self.store.read_events(session_id) if e.fingerprint == target.fingerprint
]
if resample:
target.auto_sample = _attempt_auto_sample(
fresh = _attempt_auto_sample(
meta.args.get("udid", ""), events[0].pid if events else 0
)
target.auto_samples = [fresh]
if json_mode:
from common.hang_pipeline import cluster_to_json

Expand Down Expand Up @@ -687,6 +688,7 @@ def run_worker(self, session_id: str) -> int:
bundle_id = args.get("bundle_id")
predicate_override = args.get("predicate")
auto_sample = bool(args.get("auto_sample", False))
auto_spindump = bool(args.get("auto_spindump", False))
udid = args["udid"]
predicate = _resolve_predicate(predicate_override)
max_restarts = env_int("IOS_SIM_HANG_MAX_RESTARTS", DEFAULT_MAX_STREAM_RESTARTS)
Expand All @@ -696,6 +698,7 @@ def run_worker(self, session_id: str) -> int:
events_path = self.store.events_path(session_id)
counters = {"total": 0, "matched": 0, "dropped": 0, "stream_restarts": 0}
sampled_fingerprints: set[str] = set()
spindumped_fingerprints: set[str] = set()
stop_flag = {"value": False}
cap_state = {"hit": False} # set by raw reader when size cap exceeded

Expand Down Expand Up @@ -773,7 +776,9 @@ def _spawn_log_stream() -> subprocess.Popen:
bundle_id=bundle_id,
min_hang_ms=min_hang_ms,
auto_sample=auto_sample,
auto_spindump=auto_spindump,
sampled_fingerprints=sampled_fingerprints,
spindumped_fingerprints=spindumped_fingerprints,
session_id=session_id,
session_start_ms=meta.started_at_ms,
udid=udid,
Expand Down Expand Up @@ -917,7 +922,9 @@ def _read_stream_into_events(
bundle_id: str | None,
min_hang_ms: int,
auto_sample: bool,
auto_spindump: bool,
sampled_fingerprints: set[str],
spindumped_fingerprints: set[str],
session_id: str,
session_start_ms: int,
udid: str,
Expand Down Expand Up @@ -976,6 +983,11 @@ def _read_stream_into_events(
self._stash_auto_sample(
session_id, normalised, _attempt_auto_sample(udid, normalised.pid)
)
if auto_spindump and normalised.fingerprint not in spindumped_fingerprints:
spindumped_fingerprints.add(normalised.fingerprint)
self._stash_auto_sample(
session_id, normalised, _attempt_auto_spindump(udid, normalised.pid)
)
out_handle.write(event_to_jsonl(normalised) + "\n")
return proc.poll()

Expand Down Expand Up @@ -1018,6 +1030,8 @@ def _stash_auto_sample(self, session_id: str, normalised, sample: dict) -> None:

SAMPLE_DURATION_SECONDS = 1
SAMPLE_TIMEOUT_SECONDS = 5
SPINDUMP_DURATION_SECONDS = 1
SPINDUMP_TIMEOUT_SECONDS = 10


def _attempt_auto_sample(udid: str, pid: int) -> dict:
Expand Down Expand Up @@ -1098,6 +1112,82 @@ def _attempt_auto_sample(udid: str, pid: int) -> dict:
}


def _attempt_auto_spindump(udid: str, pid: int) -> dict:
"""Capture a hang report via ``xcrun simctl spawn <udid> spindump``.

``spindump`` is Apple's own hang-report tool — it produces a structured
text report explicitly designed for the "what was main thread doing"
question. Heavier than ``sample`` so we run a slightly longer timeout.
"""
captured_at_ms = int(time.time() * 1000)
if not udid:
return {
"kind": "spindump",
"stack": None,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": "no udid available",
}
if not pid:
return {
"kind": "spindump",
"stack": None,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": "no pid available",
}
cmd = [
"xcrun",
"simctl",
"spawn",
udid,
"spindump",
str(pid),
str(SPINDUMP_DURATION_SECONDS),
"-file",
"-",
]
try:
result = subprocess.run(
cmd,
capture_output=True,
text=True,
timeout=SPINDUMP_TIMEOUT_SECONDS,
check=False,
)
except subprocess.TimeoutExpired:
return {
"kind": "spindump",
"stack": None,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": "timeout",
}
except FileNotFoundError:
return {
"kind": "spindump",
"stack": None,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": "xcrun not found",
}
if result.returncode != 0 or not result.stdout.strip():
return {
"kind": "spindump",
"stack": None,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": (result.stderr.strip() or f"spindump exited {result.returncode}")[:200],
}
return {
"kind": "spindump",
"stack": result.stdout,
"captured_at_ms": captured_at_ms,
"symbolicated": False,
"reason": None,
}


# === CLI ===


Expand Down Expand Up @@ -1202,6 +1292,11 @@ def main():
action="store_true",
help="On hang, capture a main-thread stack via `xcrun simctl spawn <udid> sample`",
)
parser.add_argument(
"--auto-spindump",
action="store_true",
help="On hang, capture a spindump report via `xcrun simctl spawn <udid> spindump`",
)
parser.add_argument("--top", type=int, dest="top_n", help="Top-N clusters to retain in summary")
parser.add_argument(
"--all", action="store_true", dest="all_clusters", help="Keep all clusters (no top-N cap)"
Expand Down Expand Up @@ -1265,6 +1360,7 @@ def main():
"bundle_id": args.bundle_id,
"predicate": args.predicate,
"auto_sample": args.auto_sample,
"auto_spindump": args.auto_spindump,
"raw_capture": args.raw_capture,
"max_size_mb": args.max_size_mb,
"no_gzip": args.no_gzip,
Expand Down
57 changes: 57 additions & 0 deletions tests/test_auto_sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,3 +113,60 @@ def _fake_run(*_a, **_kw):

assert result["stack"] is None
assert result["reason"] == "xcrun not found"


# === spindump ===


def test_spindump_returns_stack_on_success(monkeypatch):
captured: dict = {}

def _fake_run(cmd, **_kw):
captured["cmd"] = cmd
return _FakeCompleted(stdout="Process: Foo [1234]\nThread 0x1 main\n foo()\n")

monkeypatch.setattr(subprocess, "run", _fake_run)
result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)

assert result["kind"] == "spindump"
assert "Process: Foo" in result["stack"]
assert result["reason"] is None
assert captured["cmd"][:5] == ["xcrun", "simctl", "spawn", "ABC-123", "spindump"]
assert "1234" in captured["cmd"]


def test_spindump_missing_udid_short_circuits(monkeypatch):
called = {"n": 0}
monkeypatch.setattr(
subprocess,
"run",
lambda *_a, **_kw: (called.__setitem__("n", called["n"] + 1) or _FakeCompleted()),
)
result = hang_watcher._attempt_auto_spindump("", 1234)

assert result["stack"] is None
assert result["reason"] == "no udid available"
assert called["n"] == 0


def test_spindump_timeout_returns_reason(monkeypatch):
def _fake_run(cmd, **_kw):
raise subprocess.TimeoutExpired(cmd=cmd, timeout=10)

monkeypatch.setattr(subprocess, "run", _fake_run)
result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)

assert result["stack"] is None
assert result["reason"] == "timeout"
assert result["kind"] == "spindump"


def test_spindump_nonzero_exit_returns_reason(monkeypatch):
def _fake_run(*_a, **_kw):
return _FakeCompleted(stdout="", stderr="No such process\n", returncode=1)

monkeypatch.setattr(subprocess, "run", _fake_run)
result = hang_watcher._attempt_auto_spindump("ABC-123", 1234)

assert result["stack"] is None
assert "No such process" in result["reason"]
Loading
Loading