Skip to content

Commit 5398e1a

Browse files
authored
Add vLLM CI metrics (#7239)
This PR has two parts, the query `vllm/merges_percentage` and a new HUD page `metrics/vllm.tsx` to display vLLM CI metrics. There are 2 KPIs to start with: 1. The % of force merges with CI failures. Its meaning is clear. 2. The % of manual merges where a vLLM maintainer merged the pull request manually without using GitHub auto-merge. But there wasn't any failures in the pull request at the time of the merge. ### What is a vLLM CI failure? As vLLM CI is on Buildkite, a CI failure means a failed Buildkite job that (1) is not a soft fail, and (2) fails on its latest retry at the time of the merge. We can get this information by joining the GitHub `pull_request` with Buildkite `vllm_buildkite_jobs` on the pull request number. ### Testing https://torchci-git-vllm-metrics-fbopensource.vercel.app/metrics/vllm cc @rzabarazesh @yeqcharlotte @simon-mo --------- Signed-off-by: Huy Do <[email protected]>
1 parent 44b32da commit 5398e1a

File tree

4 files changed

+356
-0
lines changed

4 files changed

+356
-0
lines changed
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"params": {
3+
"granularity": "String",
4+
"repo": "String",
5+
"startTime": "DateTime64(3)",
6+
"stopTime": "DateTime64(3)"
7+
},
8+
"tests": [
9+
{
10+
"granularity": "day",
11+
"repo": "vllm-project/vllm",
12+
"startTime": "2025-09-22T00:00:00.000",
13+
"stopTime": "2025-09-29T00:00:00.000"
14+
}
15+
]
16+
}
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
WITH prs AS (
2+
SELECT
3+
number,
4+
state,
5+
merged,
6+
merged_by,
7+
auto_merge,
8+
updated_at,
9+
formatDateTime(
10+
DATE_TRUNC(
11+
{granularity: String },
12+
parseDateTimeBestEffort(updated_at)
13+
),
14+
'%Y-%m-%d'
15+
) AS bucket
16+
FROM
17+
pull_request
18+
WHERE
19+
dynamoKey like concat({repo: String }, '%')
20+
AND parseDateTimeBestEffort(updated_at) >= {startTime: DateTime64(3) }
21+
AND parseDateTimeBestEffort(updated_at) < {stopTime: DateTime64(3) }
22+
),
23+
total_prs AS (
24+
SELECT
25+
bucket,
26+
count(number) AS total_count
27+
FROM
28+
prs
29+
GROUP BY
30+
bucket
31+
),
32+
open_prs AS (
33+
SELECT
34+
bucket,
35+
count(number) AS open_count
36+
FROM
37+
prs
38+
WHERE
39+
state = 'open'
40+
GROUP BY
41+
bucket
42+
),
43+
abandon_prs AS (
44+
SELECT
45+
bucket,
46+
count(number) AS abandon_count
47+
FROM
48+
prs
49+
WHERE
50+
state = 'closed'
51+
AND merged = 'false'
52+
GROUP BY
53+
bucket
54+
),
55+
merged_prs AS (
56+
SELECT
57+
*
58+
FROM
59+
prs
60+
WHERE
61+
state = 'closed'
62+
AND merged = 'true'
63+
),
64+
buildkite_jobs AS (
65+
SELECT
66+
tupleElement(vllm.vllm_buildkite_jobs.build, 'pull_request').id AS number,
67+
tupleElement(vllm.vllm_buildkite_jobs.job, 'name') AS job_name,
68+
tupleElement(vllm.vllm_buildkite_jobs.job, 'state') AS job_state,
69+
tupleElement(vllm.vllm_buildkite_jobs.job, 'created_at') AS job_created_at,
70+
-- Row 1 is the latest run of the job
71+
ROW_NUMBER() OVER (
72+
PARTITION BY number,
73+
job_name
74+
ORDER BY
75+
job_created_at DESC
76+
) AS row_num
77+
FROM
78+
vllm.vllm_buildkite_jobs
79+
WHERE
80+
tupleElement(vllm.vllm_buildkite_jobs.build, 'pull_request').id IN (
81+
SELECT
82+
toString(number)
83+
FROM
84+
merged_prs
85+
)
86+
-- Don't care for soft_failed jobs
87+
AND tupleElement(vllm.vllm_buildkite_jobs.job, 'soft_failed') = 'false'
88+
),
89+
latest_buildkite_jobs AS (
90+
SELECT
91+
*
92+
FROM
93+
buildkite_jobs
94+
WHERE
95+
row_num = 1
96+
),
97+
manual_merged_prs AS (
98+
SELECT
99+
bucket,
100+
count(number) AS manual_merged_count
101+
FROM
102+
merged_prs
103+
WHERE
104+
tupleElement(auto_merge, 'merge_method') = ''
105+
GROUP BY
106+
bucket
107+
),
108+
manual_merged_prs_with_failures AS (
109+
SELECT
110+
bucket,
111+
count(number) AS manual_merged_with_failures_count
112+
FROM
113+
merged_prs
114+
LEFT JOIN latest_buildkite_jobs ON toString(merged_prs.number) = latest_buildkite_jobs.number
115+
WHERE
116+
tupleElement(auto_merge, 'merge_method') = ''
117+
AND job_state = 'failed'
118+
GROUP BY
119+
bucket
120+
),
121+
auto_merged_prs AS (
122+
SELECT
123+
bucket,
124+
count(number) AS auto_merged_count
125+
FROM
126+
merged_prs
127+
WHERE
128+
tupleElement(auto_merge, 'merge_method') != ''
129+
GROUP BY
130+
bucket
131+
),
132+
results AS (
133+
SELECT
134+
total_prs.bucket AS granularity_bucket,
135+
total_count,
136+
open_count,
137+
abandon_count,
138+
auto_merged_count,
139+
manual_merged_count,
140+
manual_merged_with_failures_count
141+
FROM
142+
total_prs
143+
LEFT JOIN open_prs ON total_prs.bucket = open_prs.bucket
144+
LEFT JOIN abandon_prs ON total_prs.bucket = abandon_prs.bucket
145+
LEFT JOIN auto_merged_prs ON total_prs.bucket = auto_merged_prs.bucket
146+
LEFT JOIN manual_merged_prs ON total_prs.bucket = manual_merged_prs.bucket
147+
LEFT JOIN manual_merged_prs_with_failures ON total_prs.bucket = manual_merged_prs_with_failures.bucket
148+
)
149+
SELECT
150+
*
151+
FROM
152+
results
153+
ORDER BY
154+
granularity_bucket ASC

torchci/components/layout/NavBar.tsx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,10 @@ function NavBar() {
220220
),
221221
href: "/flambeau",
222222
},
223+
{
224+
name: "vLLM CI metrics",
225+
href: "/metrics/vllm",
226+
},
223227
];
224228

225229
return (

torchci/pages/metrics/vllm.tsx

Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
import { Grid, Paper, Skeleton, Stack, Typography } from "@mui/material";
2+
import { ScalarPanelWithValue } from "components/metrics/panels/ScalarPanel";
3+
import dayjs from "dayjs";
4+
import { EChartsOption } from "echarts";
5+
import ReactECharts from "echarts-for-react";
6+
import { useDarkMode } from "lib/DarkModeContext";
7+
import { useClickHouseAPIImmutable } from "lib/GeneralUtils";
8+
import _ from "lodash";
9+
import { useState } from "react";
10+
import { TimeRangePicker } from "../metrics";
11+
12+
const ROW_HEIGHT = 375;
13+
14+
function MergesPanel({ data }: { data: any }) {
15+
// Use the dark mode context to determine whether to use the dark theme
16+
const { darkMode } = useDarkMode();
17+
18+
const options: EChartsOption = {
19+
title: {
20+
text: "Merged pull requests, by day",
21+
subtext: "",
22+
},
23+
grid: { top: 60, right: 8, bottom: 24, left: 36 },
24+
dataset: { source: data },
25+
xAxis: { type: "category" },
26+
yAxis: {
27+
type: "value",
28+
},
29+
series: [
30+
{
31+
type: "bar",
32+
stack: "all",
33+
encode: {
34+
x: "granularity_bucket",
35+
y: "auto_merged_count",
36+
},
37+
},
38+
{
39+
type: "bar",
40+
stack: "all",
41+
encode: {
42+
x: "granularity_bucket",
43+
y: "manual_merged_count",
44+
},
45+
},
46+
{
47+
type: "bar",
48+
stack: "all",
49+
encode: {
50+
x: "granularity_bucket",
51+
y: "manual_merged_with_failures_count",
52+
},
53+
},
54+
],
55+
color: ["#3ba272", "#fc9403", "#ee6666"],
56+
tooltip: {
57+
trigger: "axis",
58+
formatter: (params: any) => {
59+
const manualMergedFailures =
60+
params[0].data.manual_merged_with_failures_count;
61+
const manualMerged = params[0].data.manual_merged_count;
62+
const autoMerged = params[0].data.auto_merged_count;
63+
const total = manualMergedFailures + manualMerged + autoMerged;
64+
65+
const manualMergedFailuresPct =
66+
((manualMergedFailures / total) * 100).toFixed(1) + "%";
67+
const manualMergedPct = ((manualMerged / total) * 100).toFixed(1) + "%";
68+
const autoMergedPct = ((autoMerged / total) * 100).toFixed(1) + "%";
69+
return `Force merges (red): ${manualMergedFailures} (${manualMergedFailuresPct})
70+
<br/>
71+
Manual merges (orange): ${manualMerged} (${manualMergedPct})
72+
<br/>
73+
Auto merges (green): ${autoMerged} (${autoMergedPct})
74+
<br/>
75+
Total: ${total}`;
76+
},
77+
},
78+
};
79+
80+
return (
81+
<Paper sx={{ p: 2, height: "100%" }} elevation={3}>
82+
<ReactECharts
83+
theme={darkMode ? "dark-hud" : undefined}
84+
style={{ height: "100%", width: "100%" }}
85+
option={options}
86+
/>
87+
</Paper>
88+
);
89+
}
90+
91+
export default function Page() {
92+
const [startTime, setStartTime] = useState(dayjs().subtract(1, "week"));
93+
const [stopTime, setStopTime] = useState(dayjs());
94+
const [timeRange, setTimeRange] = useState<number>(7);
95+
96+
const timeParams = {
97+
startTime: startTime.utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
98+
stopTime: stopTime.utc().format("YYYY-MM-DDTHH:mm:ss.SSS"),
99+
};
100+
101+
const { data, isLoading } = useClickHouseAPIImmutable(
102+
"vllm/merges_percentage",
103+
{
104+
...timeParams,
105+
granularity: "day",
106+
repo: "vllm-project/vllm",
107+
}
108+
);
109+
110+
if (data === undefined) {
111+
return <Skeleton variant={"rectangular"} height={"100%"} />;
112+
}
113+
114+
const manualMergedFailures =
115+
data === undefined || data.length === 0
116+
? 0
117+
: _.sumBy(data, "manual_merged_with_failures_count");
118+
const manualMerged =
119+
data === undefined || data.length === 0
120+
? 0
121+
: _.sumBy(data, "manual_merged_count");
122+
const autoMerged =
123+
data === undefined || data.length === 0
124+
? 0
125+
: _.sumBy(data, "auto_merged_count");
126+
const total = manualMergedFailures + manualMerged + autoMerged;
127+
128+
// Show their percentages instead the absolute count
129+
const manualMergedFailuresPct =
130+
total === 0 ? 0 : manualMergedFailures / total;
131+
const manualMergedPct = total == 0 ? 0 : manualMerged / total;
132+
133+
return (
134+
<div>
135+
<Stack direction="row" spacing={2} sx={{ mb: 2 }}>
136+
<Typography fontSize={"2rem"} fontWeight={"bold"}>
137+
vLLM CI Metrics
138+
</Typography>
139+
<TimeRangePicker
140+
startTime={startTime}
141+
setStartTime={setStartTime}
142+
stopTime={stopTime}
143+
setStopTime={setStopTime}
144+
timeRange={timeRange}
145+
setTimeRange={setTimeRange}
146+
/>
147+
</Stack>
148+
149+
<Grid container spacing={2}>
150+
<Grid size={{ xs: 12, md: 6 }} height={ROW_HEIGHT}>
151+
<MergesPanel data={data} />
152+
</Grid>
153+
154+
<Grid
155+
container
156+
size={{ xs: 6, md: 3, lg: 2 }}
157+
justifyContent={"stretch"}
158+
>
159+
<Stack
160+
justifyContent={"space-between"}
161+
flexGrow={1}
162+
flexWrap="wrap"
163+
spacing={1}
164+
>
165+
<ScalarPanelWithValue
166+
title={"% force merges (with failures)"}
167+
value={manualMergedFailuresPct}
168+
valueRenderer={(value) => (value * 100).toFixed(1) + "%"}
169+
badThreshold={(value) => value > 0.2}
170+
/>
171+
<ScalarPanelWithValue
172+
title={"% manual merges"}
173+
value={manualMergedPct}
174+
valueRenderer={(value) => (value * 100).toFixed(1) + "%"}
175+
badThreshold={(value) => value > 0.5}
176+
/>
177+
</Stack>
178+
</Grid>
179+
</Grid>
180+
</div>
181+
);
182+
}

0 commit comments

Comments
 (0)