Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apps/web-evals/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
"tailwind-merge": "^3.3.0",
"tailwindcss-animate": "^1.0.7",
"vaul": "^1.1.2",
"usehooks-ts": "^3.1.0",
"zod": "^3.25.61"
},
"devDependencies": {
Expand Down
28 changes: 17 additions & 11 deletions apps/web-evals/src/app/runs/[id]/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import { useMemo, useState, useCallback, useEffect, Fragment } from "react"
import { toast } from "sonner"
import { LoaderCircle, FileText, Copy, Check, StopCircle, List, Layers } from "lucide-react"
import { useLocalStorage } from "usehooks-ts"

import type { Run, TaskMetrics as _TaskMetrics, Task } from "@roo-code/evals"
import type { ToolName } from "@roo-code/types"
Expand Down Expand Up @@ -253,19 +254,24 @@ export function Run({ run }: { run: Run }) {
const [copied, setCopied] = useState(false)
const [showKillDialog, setShowKillDialog] = useState(false)
const [isKilling, setIsKilling] = useState(false)
const [groupByStatus, setGroupByStatus] = useState(() => {
// Initialize from localStorage if available (client-side only)
if (typeof window !== "undefined") {
const stored = localStorage.getItem("evals-group-by-status")
return stored === "true"

function deserializeBoolean(value: string): boolean {
// Support both raw-string storage and default `useLocalStorage` JSON serialization.
if (value === "true") return true
if (value === "false") return false
try {
const parsed: unknown = JSON.parse(value)
return typeof parsed === "boolean" ? parsed : false
} catch {
return false
}
return false
})
}

// Persist groupByStatus to localStorage
useEffect(() => {
localStorage.setItem("evals-group-by-status", String(groupByStatus))
}, [groupByStatus])
const [groupByStatus, setGroupByStatus] = useLocalStorage<boolean>("evals-group-by-status", false, {
serializer: (value: boolean) => String(value),
deserializer: deserializeBoolean,
initializeWithValue: false,
})

// Determine if run is still active (has heartbeat or runners)
const isRunActive = !run.taskMetricsId && (!!heartbeat || (runners && runners.length > 0))
Expand Down
127 changes: 64 additions & 63 deletions apps/web-evals/src/app/runs/new/new-run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { useQuery } from "@tanstack/react-query"
import { useForm, FormProvider } from "react-hook-form"
import { zodResolver } from "@hookform/resolvers/zod"
import { toast } from "sonner"
import { useLocalStorage } from "usehooks-ts"
import {
X,
Rocket,
Expand Down Expand Up @@ -47,6 +48,7 @@ import {
ITERATIONS_DEFAULT,
} from "@/lib/schemas"
import { cn } from "@/lib/utils"
import { deserializeNumber, deserializeString, deserializeStringArray } from "@/lib/storage"

import { loadRooLastModelSelection, saveRooLastModelSelection } from "@/lib/roo-last-model-selection"
import { normalizeCreateRunForSubmit } from "@/lib/normalize-create-run"
Expand Down Expand Up @@ -115,6 +117,40 @@ export function NewRun() {
const [commandExecutionTimeout, setCommandExecutionTimeout] = useState(20)
const [terminalShellIntegrationTimeout, setTerminalShellIntegrationTimeout] = useState(30) // seconds

const [savedConcurrency, setSavedConcurrency] = useLocalStorage<number>("evals-concurrency", CONCURRENCY_DEFAULT, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? CONCURRENCY_DEFAULT,
initializeWithValue: false,
})
const [savedTimeout, setSavedTimeout] = useLocalStorage<number>("evals-timeout", TIMEOUT_DEFAULT, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? TIMEOUT_DEFAULT,
initializeWithValue: false,
})
const [savedCommandTimeout, setSavedCommandTimeout] = useLocalStorage<number>(
"evals-command-execution-timeout",
20,
{
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? 20,
initializeWithValue: false,
},
)
const [savedShellTimeout, setSavedShellTimeout] = useLocalStorage<number>("evals-shell-integration-timeout", 30, {
serializer: (value: number) => String(value),
deserializer: (raw: string) => deserializeNumber(raw) ?? 30,
initializeWithValue: false,
})
const [savedSuite, setSavedSuite] = useLocalStorage<"full" | "partial">("evals-suite", "full", {
serializer: (value: "full" | "partial") => value,
deserializer: (raw: string) => (deserializeString(raw) === "partial" ? "partial" : "full"),
initializeWithValue: false,
})
const [savedExercises, setSavedExercises] = useLocalStorage<string[]>("evals-exercises", [], {
deserializer: deserializeStringArray,
initializeWithValue: false,
})

const [modelSelections, setModelSelections] = useState<ModelSelection[]>([
{ id: crypto.randomUUID(), model: "", popoverOpen: false },
])
Expand Down Expand Up @@ -188,66 +224,30 @@ export function NewRun() {
register("exercises")
}, [register])

// Load settings from localStorage on mount
// Sync persisted settings into the form/state (SSR-safe)
useEffect(() => {
const savedConcurrency = localStorage.getItem("evals-concurrency")

if (savedConcurrency) {
const parsed = parseInt(savedConcurrency, 10)

if (!isNaN(parsed) && parsed >= CONCURRENCY_MIN && parsed <= CONCURRENCY_MAX) {
setValue("concurrency", parsed)
}
if (savedConcurrency >= CONCURRENCY_MIN && savedConcurrency <= CONCURRENCY_MAX) {
setValue("concurrency", savedConcurrency)
}

const savedTimeout = localStorage.getItem("evals-timeout")

if (savedTimeout) {
const parsed = parseInt(savedTimeout, 10)

if (!isNaN(parsed) && parsed >= TIMEOUT_MIN && parsed <= TIMEOUT_MAX) {
setValue("timeout", parsed)
}
if (savedTimeout >= TIMEOUT_MIN && savedTimeout <= TIMEOUT_MAX) {
setValue("timeout", savedTimeout)
}

const savedCommandTimeout = localStorage.getItem("evals-command-execution-timeout")

if (savedCommandTimeout) {
const parsed = parseInt(savedCommandTimeout, 10)

if (!isNaN(parsed) && parsed >= 20 && parsed <= 60) {
setCommandExecutionTimeout(parsed)
}
if (savedCommandTimeout >= 20 && savedCommandTimeout <= 60) {
setCommandExecutionTimeout(savedCommandTimeout)
}

const savedShellTimeout = localStorage.getItem("evals-shell-integration-timeout")

if (savedShellTimeout) {
const parsed = parseInt(savedShellTimeout, 10)

if (!isNaN(parsed) && parsed >= 30 && parsed <= 60) {
setTerminalShellIntegrationTimeout(parsed)
}
if (savedShellTimeout >= 30 && savedShellTimeout <= 60) {
setTerminalShellIntegrationTimeout(savedShellTimeout)
}

const savedSuite = localStorage.getItem("evals-suite")

setValue("suite", savedSuite)
if (savedSuite === "partial") {
setValue("suite", "partial")
const savedExercises = localStorage.getItem("evals-exercises")
if (savedExercises) {
try {
const parsed = JSON.parse(savedExercises) as string[]
if (Array.isArray(parsed)) {
setSelectedExercises(parsed)
setValue("exercises", parsed)
}
} catch {
// Invalid JSON, ignore.
}
}
setSelectedExercises(savedExercises)
setValue("exercises", savedExercises)
} else {
setSelectedExercises([])
setValue("exercises", [])
}
}, [setValue])
}, [savedConcurrency, savedTimeout, savedCommandTimeout, savedShellTimeout, savedSuite, savedExercises, setValue])

// Track previous provider to detect switches
const [prevProvider, setPrevProvider] = useState(provider)
Expand Down Expand Up @@ -344,9 +344,9 @@ export function NewRun() {

setSelectedExercises(newSelected)
setValue("exercises", newSelected)
localStorage.setItem("evals-exercises", JSON.stringify(newSelected))
setSavedExercises(newSelected)
},
[getExercisesForLanguage, selectedExercises, setValue],
[getExercisesForLanguage, selectedExercises, setSavedExercises, setValue],
)

const isLanguageSelected = useCallback(
Expand Down Expand Up @@ -863,12 +863,13 @@ export function NewRun() {
<Tabs
value={suite}
onValueChange={(value) => {
setValue("suite", value as "full" | "partial")
localStorage.setItem("evals-suite", value)
if (value === "full") {
const next = value === "partial" ? "partial" : "full"
setValue("suite", next)
setSavedSuite(next)
if (next === "full") {
setSelectedExercises([])
setValue("exercises", [])
localStorage.removeItem("evals-exercises")
setSavedExercises([])
}
}}>
<TabsList>
Expand Down Expand Up @@ -905,7 +906,7 @@ export function NewRun() {
onValueChange={(value) => {
setSelectedExercises(value)
setValue("exercises", value)
localStorage.setItem("evals-exercises", JSON.stringify(value))
setSavedExercises(value)
}}
placeholder="Select"
variant="inverted"
Expand Down Expand Up @@ -934,7 +935,7 @@ export function NewRun() {
step={1}
onValueChange={(value) => {
field.onChange(value[0])
localStorage.setItem("evals-concurrency", String(value[0]))
setSavedConcurrency(value[0] ?? CONCURRENCY_DEFAULT)
}}
/>
<div className="w-6 text-right">{field.value}</div>
Expand All @@ -960,7 +961,7 @@ export function NewRun() {
step={1}
onValueChange={(value) => {
field.onChange(value[0])
localStorage.setItem("evals-timeout", String(value[0]))
setSavedTimeout(value[0] ?? TIMEOUT_DEFAULT)
}}
/>
<div className="w-6 text-right">{field.value}</div>
Expand Down Expand Up @@ -1024,7 +1025,7 @@ export function NewRun() {
onValueChange={([value]) => {
if (value !== undefined) {
setCommandExecutionTimeout(value)
localStorage.setItem("evals-command-execution-timeout", String(value))
setSavedCommandTimeout(value)
}
}}
/>
Expand Down Expand Up @@ -1056,7 +1057,7 @@ export function NewRun() {
onValueChange={([value]) => {
if (value !== undefined) {
setTerminalShellIntegrationTimeout(value)
localStorage.setItem("evals-shell-integration-timeout", String(value))
setSavedShellTimeout(value)
}
}}
/>
Expand Down
8 changes: 1 addition & 7 deletions apps/web-evals/src/components/home/run.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,7 @@ import {
ScrollArea,
} from "@/components/ui"

// Tool group type (same as in runs.tsx)
type ToolGroup = {
id: string
name: string
icon: string
tools: string[]
}
import type { ToolGroup } from "@/lib/tool-groups"

type RunProps = {
run: EvalsRun
Expand Down
Loading
Loading