diff --git a/CHANGELOG.md b/CHANGELOG.md index f0ca03c690..0af95c994a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ - Inspect View: Render custom tool view when viewing messages. - Inspect View: Fix cmd+click on tasks/samples to open in new tab. - Inspect View: Only stream log bytes when requested chunks are large (>50MB) +- Inspect View: Add Show Retried Logs button when inside an eval set and some logs were retried (both Tasks and Samples are now de-duplicated by default). - Bugfix: Prevent component not found error during Human Agent transition. - Bugfix: Use `builtins` module rather than `__builtins__` when parsing tool function types. diff --git a/src/inspect_ai/_view/www/dist/assets/index.js b/src/inspect_ai/_view/www/dist/assets/index.js index 80995d6fee..407d56f839 100644 --- a/src/inspect_ai/_view/www/dist/assets/index.js +++ b/src/inspect_ai/_view/www/dist/assets/index.js @@ -28658,7 +28658,8 @@ Please change the parent to to { + set2((state) => { + state.logs.showRetriedLogs = showRetriedLogs; + }); } } }; @@ -30363,6 +30369,11 @@ Please change the parent to state.appActions.setLoading); const loadLogs = reactExports.useCallback( async (logPath) => { - const exec2 = async () => { - await syncLogs(); - await syncEvalSetInfo(logPath); - }; - exec2().catch((e) => { + await Promise.all([syncEvalSetInfo(logPath), syncLogs()]).catch((e) => { log$1.error("Error loading logs", e); setLoading(false, e); }); @@ -115824,6 +115831,54 @@ categories: ${categories.join(" ")}`; }; return { setDocumentTitle }; }; + const simplifiedStatusForDeduplication = (status2) => status2 === "started" || status2 === "success" ? status2 : "_other_"; + const useLogsWithretried = () => { + const logs = useStore((state) => state.logs.logs); + const logPreviews = useStore((state) => state.logs.logPreviews); + const logsWithEvalSetRetry = reactExports.useMemo(() => { + const logsByTaskId = logs.reduce( + (acc, log2) => { + const taskId = log2.task_id; + if (taskId) { + if (!(taskId in acc)) acc[taskId] = []; + acc[taskId].push(log2); + } + return acc; + }, + {} + ); + const bestByName = {}; + for (const items of Object.values(logsByTaskId)) { + items.sort((a, b) => { + const as = simplifiedStatusForDeduplication( + logPreviews[a.name]?.status + ); + const bs = simplifiedStatusForDeduplication( + logPreviews[b.name]?.status + ); + const am = a.mtime ?? 0; + const bm = b.mtime ?? 0; + if (as === bs) return bm - am; + if (as === "started") return -1; + if (bs === "started") return 1; + if (as === "success") return -1; + if (bs === "success") return 1; + console.warn(`Unexpected status combination: ${as}, ${bs}`, a, b); + return 0; + }); + const { name: name2 } = items[0]; + bestByName[name2] = { ...items[0], retried: false }; + } + return logs.map( + (log2) => bestByName[log2.name] ?? { + ...log2, + // task_id is optional for backward compatibility, only new logs files can be skippable + retried: log2.task_id ? true : void 0 + } + ); + }, [logs, logPreviews]); + return logsWithEvalSetRetry; + }; const log = createLogger("Client-Events"); function useClientEvents() { const syncLogs = useStore((state) => state.logsActions.syncLogs); @@ -117922,7 +117977,7 @@ categories: ${categories.join(" ")}`; /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(styles$1i.fullWidth, styles$1i.fullWidthPadded), children: /* @__PURE__ */ jsxRuntimeExports.jsx("span", { className: styles$1i.logDir, children: logDir2 }) }), /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(styles$1i.spacer) }), /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx("text-style-label", "text-style-secondary"), children: "Version" }), - /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(), children: "0.3.159-21-g0b8c7eb65" }), + /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(), children: "0.3.159-42-g335a16841" }), /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx("text-style-label", "text-style-secondary"), children: "Schema" }), /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(), children: DB_VERSION }), /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(styles$1i.spacer) }), @@ -175801,7 +175856,7 @@ If you are trying to annotate ${containerName} with application data, use the '$ if (url) { setTimeout(() => { if (openInNewWindow) { - window.open(url, "_blank"); + window.open(`#${url}`, "_blank"); } else { navigate(url); } @@ -175818,7 +175873,7 @@ If you are trying to annotate ${containerName} with application data, use the '$ } const openInNewWindow = e.metaKey || e.ctrlKey || e.shiftKey; if (openInNewWindow) { - window.open(rowNode.data.url, "_blank"); + window.open(`#${rowNode.data.url}`, "_blank"); } else { navigate(rowNode.data.url); } @@ -175979,8 +176034,12 @@ If you are trying to annotate ${containerName} with application data, use the '$ const gridRef = reactExports.useRef(null); const [showColumnSelector, setShowColumnSelector] = reactExports.useState(false); const columnButtonRef = reactExports.useRef(null); + const showRetriedLogs = useStore((state) => state.logs.showRetriedLogs); + const setShowRetriedLogs = useStore( + (state) => state.logsActions.setShowRetriedLogs + ); const logDir2 = useStore((state) => state.logs.logDir); - const logFiles = useStore((state) => state.logs.logs); + const logFiles = useLogsWithretried(); const evalSet = useStore((state) => state.logs.evalSet); const logPreviews = useStore((state) => state.logs.logPreviews); const { filteredCount } = useLogsListing(); @@ -176010,11 +176069,12 @@ If you are trying to annotate ${containerName} with application data, use the '$ previousWatchedLogs.current = watchedLogs; } }, [watchedLogs, startPolling, stopPolling]); - const logItems = reactExports.useMemo(() => { + const [logItems, hasRetriedLogs] = reactExports.useMemo(() => { const folderItems = []; const fileItems = []; const processedFolders = /* @__PURE__ */ new Set(); const existingLogTaskIds = /* @__PURE__ */ new Set(); + let _hasRetriedLogs = false; for (const logFile of logFiles) { if (logFile.task_id) { existingLogTaskIds.add(logFile.task_id); @@ -176030,14 +176090,19 @@ If you are trying to annotate ${containerName} with application data, use the '$ decodeURIComponent(relativePath), decodeURIComponent(dirName) ); - fileItems.push({ - id: fileOrFolderName, - name: fileOrFolderName, - type: "file", - url: logsUrl(path, logDir2), - log: logFile, - logPreview: logPreviews[logFile.name] - }); + if (logFile.retried) { + _hasRetriedLogs = true; + } + if (showRetriedLogs || !logFile.retried) { + fileItems.push({ + id: fileOrFolderName, + name: fileOrFolderName, + type: "file", + url: logsUrl(path, logDir2), + log: logFile, + logPreview: logPreviews[logFile.name] + }); + } } else if (name2.startsWith(dirWithSlash)) { const relativePath = directoryRelativeUrl(name2, currentDir); const dirName = decodeURIComponent(rootName(relativePath)); @@ -176058,9 +176123,13 @@ If you are trying to annotate ${containerName} with application data, use the '$ } } const orderedItems = [...folderItems, ...fileItems]; - const collapsedLogItems = collapseLogItems(evalSet, orderedItems); - return appendPendingItems(evalSet, existingLogTaskIds, collapsedLogItems); - }, [evalSet, logFiles, currentDir, logDir2, logPreviews]); + const _logFiles = appendPendingItems( + evalSet, + existingLogTaskIds, + orderedItems + ); + return [_logFiles, _hasRetriedLogs]; + }, [evalSet, logFiles, currentDir, logDir2, logPreviews, showRetriedLogs]); const { columns, setColumnVisibility } = useLogListColumns(); const handleColumnVisibilityChange = reactExports.useCallback( (newVisibility) => { @@ -176100,10 +176169,7 @@ If you are trying to annotate ${containerName} with application data, use the '$ }; }, [logItems]); reactExports.useEffect(() => { - const exec2 = async () => { - await loadLogs(logPath); - }; - exec2(); + loadLogs(logPath); }, [loadLogs, logPath]); const handleResetFilters = () => { if (gridRef.current?.api) { @@ -176138,6 +176204,16 @@ If you are trying to annotate ${containerName} with application data, use the '$ }, "reset-filters" ), + hasRetriedLogs && /* @__PURE__ */ jsxRuntimeExports.jsx( + NavbarButton, + { + label: "Show Retried Logs", + icon: showRetriedLogs ? ApplicationIcons.toggle.on : ApplicationIcons.toggle.off, + latched: showRetriedLogs, + onClick: () => setShowRetriedLogs(!showRetriedLogs) + }, + "show-retried" + ), /* @__PURE__ */ jsxRuntimeExports.jsx( NavbarButton, { @@ -176196,62 +176272,6 @@ If you are trying to annotate ${containerName} with application data, use the '$ ] }) ] }); }; - const collapseLogItems = (evalSet, logItems) => { - if (!evalSet) { - return logItems; - } - const running = logItems.some( - (l) => l.type === "file" && l.logPreview?.status === "started" - ); - if (!running) { - return logItems; - } - const taskIdToItems = /* @__PURE__ */ new Map(); - for (const item2 of logItems) { - if (item2.type === "file" && item2.log.task_id) { - const taskId = item2.log.task_id; - if (!taskIdToItems.has(taskId)) { - taskIdToItems.set(taskId, []); - } - taskIdToItems.get(taskId).push(item2); - } else if (item2.type === "folder" || item2.type === "file") ; - } - const selectedItems = /* @__PURE__ */ new Map(); - for (const [taskId, items] of taskIdToItems) { - let bestItem = items[0]; - for (const item2 of items) { - const currentStatus = item2.logPreview?.status; - const currentMtime = item2.log.mtime ?? 0; - const bestStatus = bestItem.logPreview?.status; - const bestMtime = bestItem.log.mtime ?? 0; - if (currentStatus === "started" && bestStatus !== "started") { - bestItem = item2; - } else if (currentStatus === "success" && bestStatus === "error") { - bestItem = item2; - } else if (currentStatus === bestStatus && currentMtime > bestMtime) { - bestItem = item2; - } - } - selectedItems.set(taskId, bestItem); - } - const collapsedLogItems = []; - const processedTaskIds = /* @__PURE__ */ new Set(); - for (const item2 of logItems) { - if (item2.type === "file" && item2.log.task_id) { - const taskId = item2.log.task_id; - if (!processedTaskIds.has(taskId)) { - const selectedItem = selectedItems.get(taskId); - if (selectedItem) { - collapsedLogItems.push(selectedItem); - } - processedTaskIds.add(taskId); - } - } else { - collapsedLogItems.push(item2); - } - } - return collapsedLogItems; - }; const appendPendingItems = (evalSet, tasksWithLogFiles, collapsedLogItems) => { const pendingTasks = new Array(); for (const task of evalSet?.tasks || []) { @@ -219226,10 +219246,7 @@ Supported expressions: const flowDir = dirname(currentPath || ""); const { loadLogs } = useLogs(); reactExports.useEffect(() => { - const exec2 = async () => { - await loadLogs(flowDir); - }; - exec2(); + loadLogs(flowDir); }, [loadLogs, flowDir]); useFlowServerData(flowDir || ""); const flow = useStore((state) => state.logs.flow); @@ -219502,15 +219519,14 @@ Supported expressions: }; }; const SamplesGrid = ({ + items, samplesPath, gridRef: externalGridRef, columns }) => { - const logDetails = useStore((state) => state.logs.logDetails); const gridState = useStore((state) => state.logs.samplesListState.gridState); const setGridState = useStore((state) => state.logsActions.setGridState); const { navigateToSampleDetail } = useSamplesGridNavigation(); - const logDir2 = useStore((state) => state.logs.logDir); const setFilteredSampleCount = useStore( (state) => state.logActions.setFilteredSampleCount ); @@ -219560,55 +219576,9 @@ Supported expressions: setPreviousSamplesPath(samplesPath); } }, [samplesPath, previousSamplesPath, setPreviousSamplesPath]); - const filteredLogDetails = reactExports.useMemo(() => { - if (!samplesPath) { - return logDetails; - } - const samplesPathAbs = join(samplesPath, logDir2); - return Object.entries(logDetails).reduce( - (acc, [logFile, details]) => { - if (logFile.startsWith(samplesPathAbs)) { - acc[logFile] = details; - } - return acc; - }, - {} - ); - }, [logDetails, logDir2, samplesPath]); reactExports.useEffect(() => { gridContainerRef.current?.focus(); }, []); - const data = reactExports.useMemo(() => { - const rows = []; - let displayIndex = 1; - Object.entries(filteredLogDetails).forEach(([logFile, details]) => { - details.sampleSummaries.forEach((sample2) => { - const row2 = { - logFile, - created: details.eval.created, - task: details.eval.task || "", - model: details.eval.model || "", - status: details.status, - sampleId: sample2.id, - epoch: sample2.epoch, - input: inputString(sample2.input).join("\n"), - target: Array.isArray(sample2.target) ? sample2.target.join(", ") : sample2.target, - error: sample2.error, - limit: sample2.limit, - retries: sample2.retries, - completed: sample2.completed || false, - displayIndex: displayIndex++ - }; - if (sample2.scores) { - Object.entries(sample2.scores).forEach(([scoreName, score2]) => { - row2[`score_${scoreName}`] = score2.value; - }); - } - rows.push(row2); - }); - }); - return rows; - }, [filteredLogDetails]); const handleRowClick = reactExports.useCallback( (e) => { if (e.data && e.node && gridRef.current?.api) { @@ -219686,7 +219656,7 @@ Supported expressions: AgGridReact, { ref: gridRef, - rowData: data, + rowData: items, animateRows: false, columnDefs: columns, defaultColDef: { @@ -219738,7 +219708,7 @@ Supported expressions: selectCurrentSample(); clearSelectedSample(); }, - loading: data.length === 0 && (loading > 0 || syncing) + loading: items.length === 0 && (loading > 0 || syncing) } ) }) }); }; @@ -219769,9 +219739,16 @@ Supported expressions: const logDir2 = useStore((state) => state.logs.logDir); const loading = useStore((state) => state.app.status.loading); const syncing = useStore((state) => state.app.status.syncing); + const showRetriedLogs = useStore((state) => state.logs.showRetriedLogs); + const setShowRetriedLogs = useStore( + (state) => state.logsActions.setShowRetriedLogs + ); const filteredSamplesCount = useStore( (state) => state.log.filteredSampleCount ); + const setFilteredSampleCount = useStore( + (state) => state.logActions.setFilteredSampleCount + ); const gridRef = reactExports.useRef(null); const [showColumnSelector, setShowColumnSelector] = reactExports.useState(false); const columnButtonRef = reactExports.useRef(null); @@ -219807,17 +219784,19 @@ Supported expressions: const flowData = useStore((state) => state.logs.flow); const currentDir = join(samplesPath || "", logDir2); const evalSet = useStore((state) => state.logs.evalSet); - const logFiles = useStore((state) => state.logs.logs); + const logFiles = useLogsWithretried(); const logPreviews = useStore((state) => state.logs.logPreviews); const currentDirLogFiles = reactExports.useMemo(() => { const files = []; for (const logFile of logFiles) { - if (logFile.name.startsWith(currentDir)) { + const inCurrentDir = logFile.name.startsWith(currentDir); + const skipped = !showRetriedLogs && logFile.retried; + if (inCurrentDir && !skipped) { files.push(logFile); } } return files; - }, [currentDir, logFiles]); + }, [currentDir, logFiles, showRetriedLogs]); const totalTaskCount = reactExports.useMemo(() => { const currentDirTaskIds = new Set(currentDirLogFiles.map((f) => f.task_id)); let count = currentDirLogFiles.length; @@ -219839,11 +219818,69 @@ Supported expressions: return count; }, [logPreviews, currentDirLogFiles]); reactExports.useEffect(() => { - const exec2 = async () => { - await loadLogs(samplesPath); - }; - exec2(); + loadLogs(samplesPath); }, [loadLogs, samplesPath]); + const logDetailsInPath = reactExports.useMemo(() => { + if (!samplesPath) { + return logDetails; + } + const samplesPathAbs = join(samplesPath, logDir2); + return Object.entries(logDetails).reduce( + (acc, [logFile, details]) => { + if (logFile.startsWith(samplesPathAbs)) { + acc[logFile] = details; + } + return acc; + }, + {} + ); + }, [logDetails, logDir2, samplesPath]); + const [sampleRows, hasRetriedLogs] = reactExports.useMemo(() => { + const allRows = []; + let displayIndex = 1; + let anyLogInCurrentDirCouldBeSkipped = false; + const logInCurrentDirByName = currentDirLogFiles.reduce( + (acc, log2) => { + if (log2.retried) { + anyLogInCurrentDirCouldBeSkipped = true; + } + acc[log2.name] = log2; + return acc; + }, + {} + ); + Object.entries(logDetailsInPath).forEach(([logFile, logDetail]) => { + logDetail.sampleSummaries.forEach((sampleSummary) => { + const row2 = { + logFile, + created: logDetail.eval.created, + task: logDetail.eval.task || "", + model: logDetail.eval.model || "", + status: logDetail.status, + sampleId: sampleSummary.id, + epoch: sampleSummary.epoch, + input: inputString(sampleSummary.input).join("\n"), + target: Array.isArray(sampleSummary.target) ? sampleSummary.target.join(", ") : sampleSummary.target, + error: sampleSummary.error, + limit: sampleSummary.limit, + retries: sampleSummary.retries, + completed: sampleSummary.completed || false, + displayIndex: displayIndex++ + }; + if (sampleSummary.scores) { + Object.entries(sampleSummary.scores).forEach(([scoreName, score2]) => { + row2[`score_${scoreName}`] = score2.value; + }); + } + allRows.push(row2); + }); + }); + const _sampleRows = allRows.filter( + (row2) => row2.logFile in logInCurrentDirByName + ); + const _hasRetriedLogs = _sampleRows.length < allRows.length || anyLogInCurrentDirCouldBeSkipped; + return [_sampleRows, _hasRetriedLogs]; + }, [logDetailsInPath, currentDirLogFiles]); const filterModel = gridRef.current?.api?.getFilterModel() || {}; const filteredFields = Object.keys(filterModel); const hasFilter = filteredFields.length > 0; @@ -219858,6 +219895,25 @@ Supported expressions: }, "reset-filters" ), + hasRetriedLogs && /* @__PURE__ */ jsxRuntimeExports.jsx( + NavbarButton, + { + label: "Show Retried Logs", + icon: showRetriedLogs ? ApplicationIcons.toggle.on : ApplicationIcons.toggle.off, + latched: showRetriedLogs, + onClick: () => { + setShowRetriedLogs(!showRetriedLogs); + setTimeout(() => { + if (gridRef.current) { + setFilteredSampleCount( + gridRef.current.api.getDisplayedRowCount() + ); + } + }, 10); + } + }, + "show-retried" + ), /* @__PURE__ */ jsxRuntimeExports.jsx( NavbarButton, { @@ -219889,6 +219945,7 @@ Supported expressions: /* @__PURE__ */ jsxRuntimeExports.jsx("div", { className: clsx(styles$1.list, "text-size-smaller"), children: /* @__PURE__ */ jsxRuntimeExports.jsx( SamplesGrid, { + items: sampleRows, samplesPath, gridRef, columns diff --git a/src/inspect_ai/_view/www/src/app/appearance/icons.ts b/src/inspect_ai/_view/www/src/app/appearance/icons.ts index 16c5718698..1073bd21ac 100644 --- a/src/inspect_ai/_view/www/src/app/appearance/icons.ts +++ b/src/inspect_ai/_view/www/src/app/appearance/icons.ts @@ -144,6 +144,11 @@ export const ApplicationIcons = { step: "bi bi-fast-forward-btn", subtask: "bi bi-subtract", success: "bi bi-check-circle-fill", + toggle: { + // combination of toggle-on and toggle2-off looked best for our default button font size + on: "bi bi-toggle-on", + off: "bi bi-toggle2-off", + }, transcript: "bi bi-list-columns-reverse", tree: { open: "bi bi-caret-down-fill", diff --git a/src/inspect_ai/_view/www/src/app/flow/FlowPanel.tsx b/src/inspect_ai/_view/www/src/app/flow/FlowPanel.tsx index ad3690329d..127a1e6c94 100644 --- a/src/inspect_ai/_view/www/src/app/flow/FlowPanel.tsx +++ b/src/inspect_ai/_view/www/src/app/flow/FlowPanel.tsx @@ -20,10 +20,7 @@ export const FlowPanel: FC = () => { // Get the logs from the store const { loadLogs } = useLogs(); useEffect(() => { - const exec = async () => { - await loadLogs(flowDir); - }; - exec(); + loadLogs(flowDir); }, [loadLogs, flowDir]); // Retrieve flow data diff --git a/src/inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx b/src/inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx index 26f2765ed0..57f96b3d90 100644 --- a/src/inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx +++ b/src/inspect_ai/_view/www/src/app/log-list/LogsPanel.tsx @@ -6,7 +6,12 @@ import { useNavigate } from "react-router-dom"; import { EvalSet } from "../../@types/log"; import { ProgressBar } from "../../components/ProgressBar"; import { useClientEvents } from "../../state/clientEvents"; -import { useDocumentTitle, useLogs, useLogsListing } from "../../state/hooks"; +import { + useDocumentTitle, + useLogs, + useLogsListing, + useLogsWithretried, +} from "../../state/hooks"; import { useStore } from "../../state/store"; import { dirname, isInDirectory } from "../../utils/path"; import { directoryRelativeUrl, join } from "../../utils/uri"; @@ -43,8 +48,12 @@ export const LogsPanel: FC = ({ maybeShowSingleLog }) => { const [showColumnSelector, setShowColumnSelector] = useState(false); const columnButtonRef = useRef(null); + const showRetriedLogs = useStore((state) => state.logs.showRetriedLogs); + const setShowRetriedLogs = useStore( + (state) => state.logsActions.setShowRetriedLogs, + ); const logDir = useStore((state) => state.logs.logDir); - const logFiles = useStore((state) => state.logs.logs); + const logFiles = useLogsWithretried(); const evalSet = useStore((state) => state.logs.evalSet); const logPreviews = useStore((state) => state.logs.logPreviews); const { filteredCount } = useLogsListing(); @@ -98,42 +107,49 @@ export const LogsPanel: FC = ({ maybeShowSingleLog }) => { } }, [watchedLogs, startPolling, stopPolling]); - const logItems: Array = - useMemo(() => { - const folderItems: Array = - []; - const fileItems: Array = - []; + const [logItems, hasRetriedLogs]: [ + Array, + boolean, + ] = useMemo(() => { + const folderItems: Array = + []; + const fileItems: Array = []; + + // Track processed folders to avoid duplicates + const processedFolders = new Set(); + const existingLogTaskIds = new Set(); + let _hasRetriedLogs = false; + + for (const logFile of logFiles) { + if (logFile.task_id) { + existingLogTaskIds.add(logFile.task_id); + } - // Track processed folders to avoid duplicates - const processedFolders = new Set(); - const existingLogTaskIds = new Set(); + const name = logFile.name; - for (const logFile of logFiles) { - if (logFile.task_id) { - existingLogTaskIds.add(logFile.task_id); - } + const cleanDir = currentDir.endsWith("/") + ? currentDir.slice(0, -1) + : currentDir; - const name = logFile.name; + const dirWithSlash = !currentDir.endsWith("/") + ? currentDir + "/" + : currentDir; - const cleanDir = currentDir.endsWith("/") - ? currentDir.slice(0, -1) - : currentDir; + if (isInDirectory(name, cleanDir)) { + const dirName = directoryRelativeUrl(currentDir, logDir); + const relativePath = directoryRelativeUrl(name, currentDir); - const dirWithSlash = !currentDir.endsWith("/") - ? currentDir + "/" - : currentDir; + const fileOrFolderName = decodeURIComponent(rootName(relativePath)); + const path = join( + decodeURIComponent(relativePath), + decodeURIComponent(dirName), + ); - if (isInDirectory(name, cleanDir)) { - const dirName = directoryRelativeUrl(currentDir, logDir); - const relativePath = directoryRelativeUrl(name, currentDir); - - const fileOrFolderName = decodeURIComponent(rootName(relativePath)); - const path = join( - decodeURIComponent(relativePath), - decodeURIComponent(dirName), - ); + if (logFile.retried) { + _hasRetriedLogs = true; + } + if (showRetriedLogs || !logFile.retried) { fileItems.push({ id: fileOrFolderName, name: fileOrFolderName, @@ -142,39 +158,39 @@ export const LogsPanel: FC = ({ maybeShowSingleLog }) => { log: logFile, logPreview: logPreviews[logFile.name], }); - } else if (name.startsWith(dirWithSlash)) { - // This is file that is next level (or deeper) child of the current directory - const relativePath = directoryRelativeUrl(name, currentDir); - - const dirName = decodeURIComponent(rootName(relativePath)); - const currentDirRelative = directoryRelativeUrl(currentDir, logDir); - const url = join(dirName, decodeURIComponent(currentDirRelative)); - if (!processedFolders.has(dirName)) { - folderItems.push({ - id: dirName, - name: dirName, - type: "folder", - url: logsUrl(url, logDir), - itemCount: logFiles.filter((file) => - file.name.startsWith(dirname(name)), - ).length, - }); - processedFolders.add(dirName); - } + } + } else if (name.startsWith(dirWithSlash)) { + // This is file that is next level (or deeper) child of the current directory + const relativePath = directoryRelativeUrl(name, currentDir); + + const dirName = decodeURIComponent(rootName(relativePath)); + const currentDirRelative = directoryRelativeUrl(currentDir, logDir); + const url = join(dirName, decodeURIComponent(currentDirRelative)); + if (!processedFolders.has(dirName)) { + folderItems.push({ + id: dirName, + name: dirName, + type: "folder", + url: logsUrl(url, logDir), + itemCount: logFiles.filter((file) => + file.name.startsWith(dirname(name)), + ).length, + }); + processedFolders.add(dirName); } } + } - const orderedItems = [...folderItems, ...fileItems]; + const orderedItems = [...folderItems, ...fileItems]; - // Ensure there is only one entry for each task id, preferring to - // always show running or complete tasks (over error tasks). Ensure that the - // order of all items isn't changed - const collapsedLogItems: Array< - FileLogItem | FolderLogItem | PendingTaskItem - > = collapseLogItems(evalSet, orderedItems); + const _logFiles = appendPendingItems( + evalSet, + existingLogTaskIds, + orderedItems, + ); - return appendPendingItems(evalSet, existingLogTaskIds, collapsedLogItems); - }, [evalSet, logFiles, currentDir, logDir, logPreviews]); + return [_logFiles, _hasRetriedLogs]; + }, [evalSet, logFiles, currentDir, logDir, logPreviews, showRetriedLogs]); const { columns, setColumnVisibility } = useLogListColumns(); @@ -225,10 +241,7 @@ export const LogsPanel: FC = ({ maybeShowSingleLog }) => { }, [logItems]); useEffect(() => { - const exec = async () => { - await loadLogs(logPath); - }; - exec(); + loadLogs(logPath); }, [loadLogs, logPath]); const handleResetFilters = () => { @@ -266,6 +279,20 @@ export const LogsPanel: FC = ({ maybeShowSingleLog }) => { /> )} + {hasRetriedLogs && ( + setShowRetriedLogs(!showRetriedLogs)} + /> + )} + = ({ maybeShowSingleLog }) => { ); }; -export const collapseLogItems = ( - evalSet: EvalSet | undefined, - logItems: (FileLogItem | FolderLogItem | PendingTaskItem)[], -): (FileLogItem | FolderLogItem | PendingTaskItem)[] => { - if (!evalSet) { - return logItems; - } - - const running = logItems.some( - (l) => l.type === "file" && l.logPreview?.status === "started", - ); - if (!running) { - return logItems; - } - - // Group file items by task_id - const taskIdToItems = new Map(); - const itemsWithoutTaskId: Array = []; - - for (const item of logItems) { - if (item.type === "file" && item.log.task_id) { - const taskId = item.log.task_id; - if (!taskIdToItems.has(taskId)) { - taskIdToItems.set(taskId, []); - } - taskIdToItems.get(taskId)!.push(item); - } else if (item.type === "folder" || item.type === "file") { - itemsWithoutTaskId.push(item); - } - } - - // For each task_id, select the best item (prefer running/complete over error) - const selectedItems = new Map(); - for (const [taskId, items] of taskIdToItems) { - // Sort by status priority: started > success > error - // If same priority, take the last one - let bestItem = items[0]; - for (const item of items) { - const currentStatus = item.logPreview?.status; - const currentMtime = item.log.mtime ?? 0; - const bestStatus = bestItem.logPreview?.status; - const bestMtime = bestItem.log.mtime ?? 0; - - // Prefer started over everything - if (currentStatus === "started" && bestStatus !== "started") { - bestItem = item; - } - // Prefer success over error - else if (currentStatus === "success" && bestStatus === "error") { - bestItem = item; - } - // If same status or current is error, prefer most recent - else if (currentStatus === bestStatus && currentMtime > bestMtime) { - bestItem = item; - } - } - selectedItems.set(taskId, bestItem); - } - - // Rebuild logItems maintaining order, replacing duplicates with selected item - const collapsedLogItems: Array< - FileLogItem | FolderLogItem | PendingTaskItem - > = []; - const processedTaskIds = new Set(); - - for (const item of logItems) { - if (item.type === "file" && item.log.task_id) { - const taskId = item.log.task_id; - if (!processedTaskIds.has(taskId)) { - const selectedItem = selectedItems.get(taskId); - if (selectedItem) { - collapsedLogItems.push(selectedItem); - } - processedTaskIds.add(taskId); - } - } else { - // Include folders and files without task_id - collapsedLogItems.push(item); - } - } - return collapsedLogItems; -}; - const appendPendingItems = ( evalSet: EvalSet | undefined, tasksWithLogFiles: Set, diff --git a/src/inspect_ai/_view/www/src/app/samples-panel/SamplesPanel.tsx b/src/inspect_ai/_view/www/src/app/samples-panel/SamplesPanel.tsx index 7f855355a7..554fbc7fb4 100644 --- a/src/inspect_ai/_view/www/src/app/samples-panel/SamplesPanel.tsx +++ b/src/inspect_ai/_view/www/src/app/samples-panel/SamplesPanel.tsx @@ -4,7 +4,11 @@ import { AgGridReact } from "ag-grid-react"; import { FC, useCallback, useEffect, useMemo, useRef, useState } from "react"; import { ActivityBar } from "../../components/ActivityBar"; import { ProgressBar } from "../../components/ProgressBar"; -import { useLogs } from "../../state/hooks"; +import { + LogHandleWithretried, + useLogs, + useLogsWithretried, +} from "../../state/hooks"; import { useStore } from "../../state/store"; import { join } from "../../utils/uri"; import { ApplicationIcons } from "../appearance/icons"; @@ -19,6 +23,8 @@ import { ColumnSelectorPopover } from "../shared/ColumnSelectorPopover"; import { useSampleColumns } from "./samples-grid/hooks"; import { SamplesGrid } from "./samples-grid/SamplesGrid"; import styles from "./SamplesPanel.module.css"; +import { SampleRow } from "./samples-grid/types"; +import { inputString } from "../../utils/format"; export const SamplesPanel: FC = () => { const { samplesPath } = useSamplesRouteParams(); @@ -27,10 +33,17 @@ export const SamplesPanel: FC = () => { const loading = useStore((state) => state.app.status.loading); const syncing = useStore((state) => state.app.status.syncing); + const showRetriedLogs = useStore((state) => state.logs.showRetriedLogs); + const setShowRetriedLogs = useStore( + (state) => state.logsActions.setShowRetriedLogs, + ); const filteredSamplesCount = useStore( (state) => state.log.filteredSampleCount, ); + const setFilteredSampleCount = useStore( + (state) => state.logActions.setFilteredSampleCount, + ); const gridRef = useRef(null); const [showColumnSelector, setShowColumnSelector] = useState(false); @@ -80,18 +93,20 @@ export const SamplesPanel: FC = () => { const currentDir = join(samplesPath || "", logDir); const evalSet = useStore((state) => state.logs.evalSet); - const logFiles = useStore((state) => state.logs.logs); + const logFiles = useLogsWithretried(); const logPreviews = useStore((state) => state.logs.logPreviews); const currentDirLogFiles = useMemo(() => { const files = []; for (const logFile of logFiles) { - if (logFile.name.startsWith(currentDir)) { + const inCurrentDir = logFile.name.startsWith(currentDir); + const skipped = !showRetriedLogs && logFile.retried; + if (inCurrentDir && !skipped) { files.push(logFile); } } return files; - }, [currentDir, logFiles]); + }, [currentDir, logFiles, showRetriedLogs]); const totalTaskCount = useMemo(() => { const currentDirTaskIds = new Set(currentDirLogFiles.map((f) => f.task_id)); @@ -116,12 +131,87 @@ export const SamplesPanel: FC = () => { }, [logPreviews, currentDirLogFiles]); useEffect(() => { - const exec = async () => { - await loadLogs(samplesPath); - }; - exec(); + loadLogs(samplesPath); }, [loadLogs, samplesPath]); + // Filter logDetails based on samplesPath + const logDetailsInPath = useMemo(() => { + if (!samplesPath) { + return logDetails; // Show all samples when no path is specified + } + + const samplesPathAbs = join(samplesPath, logDir); + + return Object.entries(logDetails).reduce( + (acc, [logFile, details]) => { + // Check if the logFile starts with the samplesPath + if (logFile.startsWith(samplesPathAbs)) { + acc[logFile] = details; + } + return acc; + }, + {} as typeof logDetails, + ); + }, [logDetails, logDir, samplesPath]); + + // Transform logDetails into flat rows + const [sampleRows, hasRetriedLogs] = useMemo(() => { + const allRows: SampleRow[] = []; + let displayIndex = 1; + + let anyLogInCurrentDirCouldBeSkipped = false; + const logInCurrentDirByName = currentDirLogFiles.reduce( + (acc: Record, log) => { + if (log.retried) { + anyLogInCurrentDirCouldBeSkipped = true; + } + acc[log.name] = log; + return acc; + }, + {}, + ); + + Object.entries(logDetailsInPath).forEach(([logFile, logDetail]) => { + logDetail.sampleSummaries.forEach((sampleSummary) => { + const row: SampleRow = { + logFile, + created: logDetail.eval.created, + task: logDetail.eval.task || "", + model: logDetail.eval.model || "", + status: logDetail.status, + sampleId: sampleSummary.id, + epoch: sampleSummary.epoch, + input: inputString(sampleSummary.input).join("\n"), + target: Array.isArray(sampleSummary.target) + ? sampleSummary.target.join(", ") + : sampleSummary.target, + error: sampleSummary.error, + limit: sampleSummary.limit, + retries: sampleSummary.retries, + completed: sampleSummary.completed || false, + displayIndex: displayIndex++, + }; + + // Add scores as individual fields + if (sampleSummary.scores) { + Object.entries(sampleSummary.scores).forEach(([scoreName, score]) => { + row[`score_${scoreName}`] = score.value; + }); + } + + allRows.push(row); + }); + }); + + const _sampleRows = allRows.filter( + (row) => row.logFile in logInCurrentDirByName, + ); + const _hasRetriedLogs = + _sampleRows.length < allRows.length || anyLogInCurrentDirCouldBeSkipped; + + return [_sampleRows, _hasRetriedLogs]; + }, [logDetailsInPath, currentDirLogFiles]); + const filterModel = gridRef.current?.api?.getFilterModel() || {}; const filteredFields = Object.keys(filterModel); const hasFilter = filteredFields.length > 0; @@ -138,6 +228,29 @@ export const SamplesPanel: FC = () => { /> )} + {hasRetriedLogs && ( + { + setShowRetriedLogs(!showRetriedLogs); + // update number of samples displayed in lower right corner when toggling + setTimeout(() => { + if (gridRef.current) { + setFilteredSampleCount( + gridRef.current.api.getDisplayedRowCount(), + ); + } + }, 10); + }} + /> + )} {
| null>; columns: ColDef[]; @@ -30,15 +29,14 @@ interface SamplesGridProps { // Sample Grid export const SamplesGrid: FC = ({ + items, samplesPath, gridRef: externalGridRef, columns, }) => { - const logDetails = useStore((state) => state.logs.logDetails); const gridState = useStore((state) => state.logs.samplesListState.gridState); const setGridState = useStore((state) => state.logsActions.setGridState); const { navigateToSampleDetail } = useSamplesGridNavigation(); - const logDir = useStore((state) => state.logs.logDir); const setFilteredSampleCount = useStore( (state) => state.logActions.setFilteredSampleCount, ); @@ -102,70 +100,10 @@ export const SamplesGrid: FC = ({ } }, [samplesPath, previousSamplesPath, setPreviousSamplesPath]); - // Filter logDetails based on samplesPath - const filteredLogDetails = useMemo(() => { - if (!samplesPath) { - return logDetails; // Show all samples when no path is specified - } - - const samplesPathAbs = join(samplesPath, logDir); - - return Object.entries(logDetails).reduce( - (acc, [logFile, details]) => { - // Check if the logFile starts with the samplesPath - if (logFile.startsWith(samplesPathAbs)) { - acc[logFile] = details; - } - return acc; - }, - {} as typeof logDetails, - ); - }, [logDetails, logDir, samplesPath]); - useEffect(() => { gridContainerRef.current?.focus(); }, []); - // Transform logDetails into flat rows - const data = useMemo(() => { - const rows: SampleRow[] = []; - let displayIndex = 1; - - Object.entries(filteredLogDetails).forEach(([logFile, details]) => { - details.sampleSummaries.forEach((sample) => { - const row: SampleRow = { - logFile, - created: details.eval.created, - task: details.eval.task || "", - model: details.eval.model || "", - status: details.status, - sampleId: sample.id, - epoch: sample.epoch, - input: inputString(sample.input).join("\n"), - target: Array.isArray(sample.target) - ? sample.target.join(", ") - : sample.target, - error: sample.error, - limit: sample.limit, - retries: sample.retries, - completed: sample.completed || false, - displayIndex: displayIndex++, - }; - - // Add scores as individual fields - if (sample.scores) { - Object.entries(sample.scores).forEach(([scoreName, score]) => { - row[`score_${scoreName}`] = score.value; - }); - } - - rows.push(row); - }); - }); - - return rows; - }, [filteredLogDetails]); - const handleRowClick = useCallback( (e: RowClickedEvent) => { if (e.data && e.node && gridRef.current?.api) { @@ -278,7 +216,7 @@ export const SamplesGrid: FC = ({
ref={gridRef} - rowData={data} + rowData={items} animateRows={false} columnDefs={columns} defaultColDef={{ @@ -333,7 +271,7 @@ export const SamplesGrid: FC = ({ selectCurrentSample(); clearSelectedSample(); }} - loading={data.length === 0 && (loading > 0 || syncing)} + loading={items.length === 0 && (loading > 0 || syncing)} />
diff --git a/src/inspect_ai/_view/www/src/app/samples-panel/samples-grid/types.ts b/src/inspect_ai/_view/www/src/app/samples-panel/samples-grid/types.ts index 8eec57be18..63f9e6034d 100644 --- a/src/inspect_ai/_view/www/src/app/samples-panel/samples-grid/types.ts +++ b/src/inspect_ai/_view/www/src/app/samples-panel/samples-grid/types.ts @@ -4,6 +4,7 @@ import { Status } from "../../../@types/log"; export interface SampleRow { displayIndex?: number; logFile: string; + created: string; // representing datetime task: string; model: string; status?: Status; diff --git a/src/inspect_ai/_view/www/src/app/types.ts b/src/inspect_ai/_view/www/src/app/types.ts index 43a67a10cf..df2fad0a8a 100644 --- a/src/inspect_ai/_view/www/src/app/types.ts +++ b/src/inspect_ai/_view/www/src/app/types.ts @@ -89,6 +89,7 @@ export interface LogsState { }; flow?: string; flowDir?: string; + showRetriedLogs: boolean; } export interface LogsListing { diff --git a/src/inspect_ai/_view/www/src/state/hooks.ts b/src/inspect_ai/_view/www/src/state/hooks.ts index 1496ee3a44..2e93bc1553 100644 --- a/src/inspect_ai/_view/www/src/state/hooks.ts +++ b/src/inspect_ai/_view/www/src/state/hooks.ts @@ -1,6 +1,6 @@ import { highlightElement } from "prismjs"; import { RefObject, useCallback, useEffect, useMemo, useRef } from "react"; -import { EvalSample, EvalSpec, Events } from "../@types/log"; +import { EvalSample, EvalSpec, Events, Status } from "../@types/log"; import { createEvalDescriptor, createSamplesDescriptor, @@ -584,27 +584,17 @@ export const useSamplePopover = (id: string) => { }; export const useLogs = () => { - // Loading logs + // Loading logs and eval set info const syncLogs = useStore((state) => state.logsActions.syncLogs); - - // Loading eval set info const syncEvalSetInfo = useStore( (state) => state.logsActions.syncEvalSetInfo, ); - - // Status const setLoading = useStore((state) => state.appActions.setLoading); const loadLogs = useCallback( async (logPath?: string) => { - const exec = async () => { - // Sync logs - await syncLogs(); - - // Sync eval set info - await syncEvalSetInfo(logPath); - }; - exec().catch((e) => { + // load in parallel to display Show Retried Logs button as soon as we know current directory is an eval set without awaiting all logs + await Promise.all([syncEvalSetInfo(logPath), syncLogs()]).catch((e) => { log.error("Error loading logs", e); setLoading(false, e as Error); }); @@ -699,3 +689,65 @@ export const useDocumentTitle = () => { }; return { setDocumentTitle }; }; + +const simplifiedStatusForDeduplication = (status: Status | undefined) => + status === "started" || status === "success" ? status : "_other_"; + +export type LogHandleWithretried = LogHandle & { retried?: boolean }; +export const useLogsWithretried = (): LogHandleWithretried[] => { + const logs = useStore((state) => state.logs.logs); + const logPreviews = useStore((state) => state.logs.logPreviews); + + const logsWithEvalSetRetry = useMemo(() => { + const logsByTaskId = logs.reduce( + (acc: Record, log) => { + const taskId = log.task_id; + if (taskId) { + if (!(taskId in acc)) acc[taskId] = []; + acc[taskId].push(log); + } + return acc; + }, + {}, + ); + // For each task_id, select the best item (prefer running/complete over error) + // Sort by status priority: started > success > error, cancelled, or missing if logPreview is not loaded + // If same priority, take the latest one + const bestByName: Record = {}; + for (const items of Object.values(logsByTaskId)) { + items.sort((a, b) => { + const as = simplifiedStatusForDeduplication( + logPreviews[a.name]?.status, + ); + const bs = simplifiedStatusForDeduplication( + logPreviews[b.name]?.status, + ); + const am = a.mtime ?? 0; + const bm = b.mtime ?? 0; + + if (as === bs) return bm - am; // newest on top + if (as === "started") return -1; + if (bs === "started") return 1; + if (as === "success") return -1; + if (bs === "success") return 1; + + console.warn(`Unexpected status combination: ${as}, ${bs}`, a, b); + return 0; + }); + const { name } = items[0]; + bestByName[name] = { ...items[0], retried: false }; + } + + // Rebuild logs maintaining order, marking duplicates as skippable + return logs.map( + (log) => + bestByName[log.name] ?? { + ...log, + // task_id is optional for backward compatibility, only new logs files can be skippable + retried: log.task_id ? true : undefined, + }, + ); + }, [logs, logPreviews]); + + return logsWithEvalSetRetry; +}; diff --git a/src/inspect_ai/_view/www/src/state/logsSlice.ts b/src/inspect_ai/_view/www/src/state/logsSlice.ts index 10f7aecebd..f10def3e18 100644 --- a/src/inspect_ai/_view/www/src/state/logsSlice.ts +++ b/src/inspect_ai/_view/www/src/state/logsSlice.ts @@ -63,6 +63,7 @@ export interface LogsSlice { clearDisplayedSamples: () => void; setSamplesColumnVisibility: (visibility: Record) => void; setPreviousSamplesPath: (path: string | undefined) => void; + setShowRetriedLogs: (showRetriedLogs: boolean) => void; }; } @@ -84,6 +85,7 @@ const initialState: LogsState = { samplesListState: { columnVisibility: {}, }, + showRetriedLogs: false, }; export const createLogsSlice = ( @@ -459,6 +461,11 @@ export const createLogsSlice = ( return []; } }, + setShowRetriedLogs: (showRetriedLogs: boolean) => { + set((state) => { + state.logs.showRetriedLogs = showRetriedLogs; + }); + }, }, } as const;