Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hyperactor_mesh/src/host_mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -287,7 +287,7 @@ impl HostMesh {
mesh_id: mesh_id_hash,
rank: rank as u64,
full_name: actor.actor_id().to_string(),
display_name: None,
display_name: Some(format!("Host Unit Rank {rank}")),
});
}
}
Expand Down
3 changes: 1 addition & 2 deletions hyperactor_mesh/src/proc_mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,14 +244,13 @@ impl ProcMesh {
let now = std::time::SystemTime::now();
for rank in current_ref.ranks.iter() {
let actor_id = rank.agent.actor_id();

hyperactor_telemetry::notify_actor_created(hyperactor_telemetry::ActorEvent {
id: hyperactor_telemetry::hash_to_u64(actor_id),
timestamp: now,
mesh_id: mesh_id_hash,
rank: rank.create_rank as u64,
full_name: actor_id.to_string(),
display_name: None,
display_name: Some(format!("Proc Unit Rank {}", rank.create_rank)),
});
}
}
Expand Down
4 changes: 2 additions & 2 deletions monarch_hyperactor/src/host_mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ fn bootstrap_host(bootstrap_cmd: Option<PyBootstrapCommand>) -> PyResult<PyPytho
mesh_id: host_mesh_id,
rank: 0,
full_name: host_agent_id.to_string(),
display_name: None,
display_name: Some("Host Unit Rank 0".into()),
});

let proc_name_str = proc_mesh.name().to_string();
Expand All @@ -444,7 +444,7 @@ fn bootstrap_host(bootstrap_cmd: Option<PyBootstrapCommand>) -> PyResult<PyPytho
mesh_id: proc_mesh_id,
rank: 0,
full_name: proc_agent_id.to_string(),
display_name: None,
display_name: Some("Proc Unit Rank 0".into()),
});

let client_mesh_name = format!("{}/client", proc_mesh.name());
Expand Down
3 changes: 3 additions & 0 deletions python/monarch/monarch_dashboard/fake_data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,7 @@ def _generate_hierarchy() -> tuple[
"mesh_id": host_mesh_id,
"rank": 0,
"full_name": hma_full,
"display_name": f"Host Unit Rank {h_idx}",
}
)
actor_to_host_mesh[hma_id] = host_mesh_id
Expand Down Expand Up @@ -310,6 +311,7 @@ def _generate_hierarchy() -> tuple[
"mesh_id": proc_mesh_id,
"rank": 0,
"full_name": pma_full,
"display_name": f"Proc Unit Rank {pm_idx}",
}
)
actor_to_host_mesh[pma_id] = host_mesh_id
Expand Down Expand Up @@ -357,6 +359,7 @@ def _generate_hierarchy() -> tuple[
"mesh_id": actor_mesh_id,
"rank": rank,
"full_name": actor_full,
"display_name": None,
}
)
actor_to_host_mesh[aid] = host_mesh_id
Expand Down
3 changes: 3 additions & 0 deletions python/monarch/monarch_dashboard/fake_data/simulate.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ def _build_hierarchy() -> dict:
"mesh_id": host_mesh_id,
"rank": 0,
"full_name": f"{host_full}/HostAgent[0]",
"display_name": f"Host Unit Rank {h_idx}",
}
)
# 2 proc meshes per host mesh.
Expand Down Expand Up @@ -130,6 +131,7 @@ def _build_hierarchy() -> dict:
"mesh_id": proc_mesh_id,
"rank": 0,
"full_name": f"{pm_full}/ProcAgent[0]",
"display_name": f"Proc Unit Rank {pm_idx}",
}
)
# 1 actor mesh per proc mesh.
Expand Down Expand Up @@ -159,6 +161,7 @@ def _build_hierarchy() -> dict:
"mesh_id": actor_mesh_id,
"rank": 0,
"full_name": f"{am_full}/{actor_type}[0]",
"display_name": None,
}
)
if actor_trigger_id is None:
Expand Down
4 changes: 2 additions & 2 deletions python/monarch/monarch_dashboard/frontend/src/App.css
Original file line number Diff line number Diff line change
Expand Up @@ -828,14 +828,14 @@ button:focus:not(:focus-visible) {

.dag-tooltip {
position: absolute;
transform: translate(-50%, -100%);
transform: translate(-50%, 0);
pointer-events: none;
background: var(--bg-tertiary);
border: 1px solid var(--border-subtle);
border-radius: var(--radius-sm);
padding: 6px 10px;
white-space: nowrap;
z-index: 20;
z-index: 1000;
box-shadow: 0 4px 16px rgba(0, 0, 0, 0.5);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,9 @@ export function DagNodeComponent({
const isRect = RECT_TIERS.has(node.tier);
const isSmallNode = node.tier === "actor" || node.tier === "actor_mesh";

// Truncate label for small nodes.
const maxChars = isSmallNode ? 12 : 14;
const maxLen = isSmallNode ? 14 : 20;
const displayLabel =
node.label.length > maxChars
? node.label.slice(0, maxChars - 1) + "\u2026"
: node.label;
node.label.length > maxLen ? node.label.slice(0, maxLen - 1) + "\u2026" : node.label;

// Rectangle dimensions for mesh tiers.
const w = r * 2.2;
Expand Down Expand Up @@ -183,6 +180,7 @@ export function DagNodeComponent({
{node.subtitle}
</text>
)}

</g>
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ export function DagView() {

const handleNodeHover = useCallback((node: DagNode | null) => {
if (!node) { setTooltip(null); return; }
setTooltip({ node, x: node.x, y: node.y - node.radius - 14 });
setTooltip({ node, x: node.x, y: node.y + node.radius + 10 });
}, []);

if (loading) return <div className="loading-state">Loading DAG data...</div>;
Expand Down Expand Up @@ -159,12 +159,6 @@ export function DagView() {
))}
</g>
</svg>
{tooltip && !selectedNode && (
<div className="dag-tooltip" style={{ left: `${((tooltip.x - viewBox.x) / viewBox.w) * 100}%`, top: `${((tooltip.y - viewBox.y) / viewBox.h) * 100}%` }}>
<div className="dag-tooltip-name">{tooltip.node.label}</div>
<div className="dag-tooltip-info">{tooltip.node.subtitle} &middot; {tooltip.node.status}</div>
</div>
)}
<DagLegend />
</div>
{selectedNode && <NodeDetail node={selectedNode} onClose={() => setSelectedNode(null)} />}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ function ActorErrorGroup({
{actors.map((a) => (
<div key={a.actor_id} className="summary-error-item">
<div className="summary-error-name">
{a.full_name.split("/").pop()}
{a.display_name ?? a.full_name.split("/").pop()}
</div>
<div className="summary-error-detail">
<span className="summary-error-reason-wrap">
Expand Down Expand Up @@ -237,8 +237,8 @@ function TimelineBar({
const duration = timeline.end_us - timeline.start_us;

// Collect error events with their position on the timeline.
const toNotch = (a: { full_name: string; timestamp_us: number }, status: string) => {
const name = a.full_name.split("/").pop() ?? "actor";
const toNotch = (a: { full_name: string; display_name?: string | null; timestamp_us: number }, status: string) => {
const name = a.display_name ?? a.full_name.split("/").pop() ?? "actor";
return {
pct: ((a.timestamp_us - timeline.start_us) / duration) * 100,
status,
Expand Down
2 changes: 2 additions & 0 deletions python/monarch/monarch_dashboard/frontend/src/types/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,15 @@ export interface Summary {
failed_actors: Array<{
actor_id: EntityId;
full_name: string;
display_name?: string | null;
reason: string | null;
timestamp_us: number;
mesh_id: EntityId;
}>;
stopped_actors: Array<{
actor_id: EntityId;
full_name: string;
display_name?: string | null;
reason: string | null;
timestamp_us: number;
mesh_id: EntityId;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ export interface ApiDagNode {
label: string;
subtitle: string;
status: string;
rank?: number;
}

/** An edge from the /api/dag response. */
Expand Down Expand Up @@ -146,7 +147,11 @@ export function computeLayout(data: ApiDagData): DagGraph {
const node = nodeMap[id];
if (!node) return [];

const kids = children[id] ?? [];
const kids = (children[id] ?? []).slice().sort((a, b) => {
const ra = nodeMap[a]?.rank ?? 0;
const rb = nodeMap[b]?.rank ?? 0;
return ra - rb;
});
if (kids.length === 0) {
const x = nextX;
nextX += HORIZONTAL_SPACING;
Expand Down
9 changes: 6 additions & 3 deletions python/monarch/monarch_dashboard/server/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,10 @@ def _leaf_name(name: str) -> str:
"id": f"host_unit-{agent['id']}",
"entity_id": agent["id"],
"tier": "host_unit",
"label": _leaf_name(agent["full_name"]),
"label": agent.get("display_name") or _leaf_name(agent["full_name"]),
"subtitle": "Host",
"status": actor_statuses.get(agent["id"], "unknown"),
"rank": agent["rank"],
}
)

Expand All @@ -570,9 +571,10 @@ def _leaf_name(name: str) -> str:
"id": f"proc_unit-{agent['id']}",
"entity_id": agent["id"],
"tier": "proc_unit",
"label": _leaf_name(agent["full_name"]),
"label": agent.get("display_name") or _leaf_name(agent["full_name"]),
"subtitle": "Proc",
"status": actor_statuses.get(agent["id"], "unknown"),
"rank": agent["rank"],
}
)

Expand All @@ -597,6 +599,7 @@ def _leaf_name(name: str) -> str:
"label": _leaf_name(a["full_name"]),
"subtitle": f"rank {a['rank']}",
"status": actor_statuses.get(a["id"], "unknown"),
"rank": a["rank"],
}
)

Expand Down Expand Up @@ -778,7 +781,7 @@ def _count(sql: str) -> int:
# -- Error details --
# Use LOWER() so both fake data ("failed") and real telemetry ("Failed") match.
_error_actor_sql = (
"SELECT ase.actor_id, a.full_name, ase.reason, ase.timestamp_us, a.mesh_id"
"SELECT ase.actor_id, a.full_name, a.display_name, ase.reason, ase.timestamp_us, a.mesh_id"
" FROM actor_status_events ase"
" JOIN actors a ON ase.actor_id = a.id"
f" INNER JOIN ({_LATEST_ACTOR_STATUS_SQL}) latest"
Expand Down
1 change: 1 addition & 0 deletions python/monarch/monarch_dashboard/server/tests/test_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ def test_existing_actor(self):
"mesh_id",
"rank",
"full_name",
"display_name",
}
self.assertTrue(expected_keys.issubset(set(actor.keys())))

Expand Down
Loading