Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"tailwind-merge": "^3.3.0",
"tailwindcss-animate": "^1.0.7",
"vaul": "^1.1.2",
"usehooks-ts": "^3.1.0",
"zod": "^3.25.61"
},
"devDependencies": {
Expand Down
19 changes: 7 additions & 12 deletions apps/web-evals/src/app/runs/[id]/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@
import { useMemo, useState, useCallback, useEffect, Fragment } from "react"
import { toast } from "sonner"
import { LoaderCircle, FileText, Copy, Check, StopCircle, List, Layers } from "lucide-react"
import { useLocalStorage } from "usehooks-ts"

import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals"
import type { ToolName } from "@roo-code/types"

import { formatCurrency, formatDuration, formatTokens, formatToolUsageSuccessRate } from "@/lib/formatters"
import { deserializeBoolean } from "@/lib/storage"
import { useRunStatus } from "@/hooks/use-run-status"
import { killRun } from "@/actions/runs"
import {
Expand Down Expand Up @@ -253,19 +255,12 @@ export function Run({ run }: { run: Run }) {
const [copied, setCopied] = useState(false)
const [showKillDialog, setShowKillDialog] = useState(false)
const [isKilling, setIsKilling] = useState(false)
const [groupByStatus, setGroupByStatus] = useState(() => {
// Initialize from localStorage if available (client-side only)
if (typeof window !== "undefined") {
const stored = localStorage.getItem("evals-group-by-status")
return stored === "true"
}
return false
})

// Persist groupByStatus to localStorage
useEffect(() => {
localStorage.setItem("evals-group-by-status", String(groupByStatus))
}, [groupByStatus])
const [groupByStatus, setGroupByStatus] = useLocalStorage<boolean>("evals-group-by-status", false, {
serializer: (value: boolean) => String(value),
deserializer: deserializeBoolean,
initializeWithValue: false,
})

// Determine if run is still active (has heartbeat or runners)
const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0))
Expand Down
129 changes: 66 additions & 63 deletions apps/web-evals/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { useQuery } from "@tanstack/react-query"
import { useForm, FormProvider } from "react-hook-form"
import { zodResolver } from "@hookform/resolvers/zod"
import { toast } from "sonner"
import { useLocalStorage } from "usehooks-ts"
import {
X,
Rocket,
Expand Down Expand Up @@ -47,6 +48,7 @@ import {
ITERATIONS_DEFAULT,
} from "@/lib/schemas"
import { cn } from "@/lib/utils"
import { deserializeEnum, deserializeNumber, deserializeStringArray } from "@/lib/storage"

import { loadRooLastModelSelection, saveRooLastModelSelection } from "@/lib/roo-last-model-selection"
import { normalizeCreateRunForSubmit } from "@/lib/normalize-create-run"
Expand Down Expand Up @@ -104,6 +106,8 @@ type ConfigSelection = {
popoverOpen: boolean
}

const SUITE_VALUES: ReadonlySet<"full" | "partial"> = new Set(["full", "partial"])

export function NewRun() {
const router = useRouter()
const modelSelectionsByProviderRef = useRef<Record<string, ModelSelection[]>>({})
Expand All @@ -115,6 +119,40 @@ export function NewRun() {
const [commandExecutionTimeout, setCommandExecutionTimeout] = useState(20)
const [terminalShellIntegrationTimeout, setTerminalShellIntegrationTimeout] = useState(30) // seconds

const [savedConcurrency, setSavedConcurrency] = useLocalStorage<number>("evals-concurrency", CONCURRENCY_DEFAULT, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? CONCURRENCY_DEFAULT,
initializeWithValue: false,
})
const [savedTimeout, setSavedTimeout] = useLocalStorage<number>("evals-timeout", TIMEOUT_DEFAULT, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? TIMEOUT_DEFAULT,
initializeWithValue: false,
})
const [savedCommandTimeout, setSavedCommandTimeout] = useLocalStorage<number>(
"evals-command-execution-timeout",
20,
{
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? 20,
initializeWithValue: false,
},
)
const [savedShellTimeout, setSavedShellTimeout] = useLocalStorage<number>("evals-shell-integration-timeout", 30, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? 30,
initializeWithValue: false,
})
const [savedSuite, setSavedSuite] = useLocalStorage<"full" | "partial">("evals-suite", "full", {
serializer: (value: "full" | "partial") => value,
deserializer: (raw: string) => deserializeEnum(raw, SUITE_VALUES, "full"),
initializeWithValue: false,
})
const [savedExercises, setSavedExercises] = useLocalStorage<string[]>("evals-exercises", [], {
deserializer: deserializeStringArray,
initializeWithValue: false,
})

const [modelSelections, setModelSelections] = useState<ModelSelection[]>([
{ id: crypto.randomUUID(), model: "", popoverOpen: false },
])
Expand Down Expand Up @@ -188,66 +226,30 @@ export function NewRun() {
register("exercises")
}, [register])

// Load settings from localStorage on mount
// Sync persisted settings into the form/state (SSR-safe)
useEffect(() => {
const savedConcurrency = localStorage.getItem("evals-concurrency")

if (savedConcurrency) {
const parsed = parseInt(savedConcurrency, 10)

if (!isNaN(parsed) && parsed >= CONCURRENCY_MIN && parsed <= CONCURRENCY_MAX) {
setValue("concurrency", parsed)
}
if (savedConcurrency >= CONCURRENCY_MIN && savedConcurrency <= CONCURRENCY_MAX) {
setValue("concurrency", savedConcurrency)
}

const savedTimeout = localStorage.getItem("evals-timeout")

if (savedTimeout) {
const parsed = parseInt(savedTimeout, 10)

if (!isNaN(parsed) && parsed >= TIMEOUT_MIN && parsed <= TIMEOUT_MAX) {
setValue("timeout", parsed)
}
if (savedTimeout >= TIMEOUT_MIN && savedTimeout <= TIMEOUT_MAX) {
setValue("timeout", savedTimeout)
}

const savedCommandTimeout = localStorage.getItem("evals-command-execution-timeout")

if (savedCommandTimeout) {
const parsed = parseInt(savedCommandTimeout, 10)

if (!isNaN(parsed) && parsed >= 20 && parsed <= 60) {
setCommandExecutionTimeout(parsed)
}
if (savedCommandTimeout >= 20 && savedCommandTimeout <= 60) {
setCommandExecutionTimeout(savedCommandTimeout)
}

const savedShellTimeout = localStorage.getItem("evals-shell-integration-timeout")

if (savedShellTimeout) {
const parsed = parseInt(savedShellTimeout, 10)

if (!isNaN(parsed) && parsed >= 30 && parsed <= 60) {
setTerminalShellIntegrationTimeout(parsed)
}
if (savedShellTimeout >= 30 && savedShellTimeout <= 60) {
setTerminalShellIntegrationTimeout(savedShellTimeout)
}

const savedSuite = localStorage.getItem("evals-suite")

setValue("suite", savedSuite)
if (savedSuite === "partial") {
setValue("suite", "partial")
const savedExercises = localStorage.getItem("evals-exercises")
if (savedExercises) {
try {
const parsed = JSON.parse(savedExercises) as string[]
if (Array.isArray(parsed)) {
setSelectedExercises(parsed)
setValue("exercises", parsed)
}
} catch {
// Invalid JSON, ignore.
}
}
setSelectedExercises(savedExercises)
setValue("exercises", savedExercises)
} else {
setSelectedExercises([])
setValue("exercises", [])
}
}, [setValue])
}, [savedConcurrency, savedTimeout, savedCommandTimeout, savedShellTimeout, savedSuite, savedExercises, setValue])

// Track previous provider to detect switches
const [prevProvider, setPrevProvider] = useState(provider)
Expand Down Expand Up @@ -344,9 +346,9 @@ export function NewRun() {

setSelectedExercises(newSelected)
setValue("exercises", newSelected)
localStorage.setItem("evals-exercises", JSON.stringify(newSelected))
setSavedExercises(newSelected)
},
[getExercisesForLanguage, selectedExercises, setValue],
[getExercisesForLanguage, selectedExercises, setSavedExercises, setValue],
)

const isLanguageSelected = useCallback(
Expand Down Expand Up @@ -863,12 +865,13 @@ export function NewRun() {
<Tabs
value={suite}
onValueChange={(value) => {
setValue("suite", value as "full" | "partial")
localStorage.setItem("evals-suite", value)
if (value === "full") {
const next = value === "partial" ? "partial" : "full"
setValue("suite", next)
setSavedSuite(next)
if (next === "full") {
setSelectedExercises([])
setValue("exercises", [])
localStorage.removeItem("evals-exercises")
setSavedExercises([])
}
}}>
<TabsList>
Expand Down Expand Up @@ -905,7 +908,7 @@ export function NewRun() {
onValueChange={(value) => {
setSelectedExercises(value)
setValue("exercises", value)
localStorage.setItem("evals-exercises", JSON.stringify(value))
setSavedExercises(value)
}}
placeholder="Select"
variant="inverted"
Expand Down Expand Up @@ -934,7 +937,7 @@ export function NewRun() {
step={1}
onValueChange={(value) => {
field.onChange(value[0])
localStorage.setItem("evals-concurrency", String(value[0]))
setSavedConcurrency(value[0] ?? CONCURRENCY_DEFAULT)
}}
/>
<div className="w-6 text-right">{field.value}</div>
Expand All @@ -960,7 +963,7 @@ export function NewRun() {
step={1}
onValueChange={(value) => {
field.onChange(value[0])
localStorage.setItem("evals-timeout", String(value[0]))
setSavedTimeout(value[0] ?? TIMEOUT_DEFAULT)
}}
/>
<div className="w-6 text-right">{field.value}</div>
Expand Down Expand Up @@ -1024,7 +1027,7 @@ export function NewRun() {
onValueChange={([value]) => {
if (value !== undefined) {
setCommandExecutionTimeout(value)
localStorage.setItem("evals-command-execution-timeout", String(value))
setSavedCommandTimeout(value)
}
}}
/>
Expand Down Expand Up @@ -1056,7 +1059,7 @@ export function NewRun() {
onValueChange={([value]) => {
if (value !== undefined) {
setTerminalShellIntegrationTimeout(value)
localStorage.setItem("evals-shell-integration-timeout", String(value))
setSavedShellTimeout(value)
}
}}
/>
Expand Down
8 changes: 1 addition & 7 deletions apps/web-evals/src/components/home/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,7 @@ import {
ScrollArea,
} from "@/components/ui"

// Tool group type (same as in runs.tsx)
type ToolGroup = {
id: string
name: string
icon: string
tools: string[]
}
import type { ToolGroup } from "@/lib/tool-groups"

type RunProps = {
run: EvalsRun
Expand Down
Loading
Loading