Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ tests/evals/js/eval-bun/test-data.txt
__pycache__

bt-sync
*.env
15 changes: 15 additions & 0 deletions scripts/eval-runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,21 @@ def load_evaluators(files: list[str]) -> tuple[list[EvaluatorInstance], dict[str
cwd = os.getcwd()
if cwd not in sys.path:
sys.path.insert(0, cwd)

# Add the project root inferred from input files to sys.path so that
# sibling-package imports work when files live outside CWD (e.g.
# sandbox bundles extracted to a temp directory). Walk up from each
# file's directory looking for a register.py (bundle marker) or the
# filesystem root, whichever comes first.
for f in files:
d = os.path.dirname(os.path.abspath(f))
while d and d != os.path.dirname(d):
if os.path.isfile(os.path.join(d, "register.py")):
if d not in sys.path:
sys.path.insert(0, d)
break
d = os.path.dirname(d)

unique_files: set[str] = set()
for file_path in files:
for candidate in collect_files(file_path):
Expand Down
5 changes: 4 additions & 1 deletion scripts/functions-bundler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,10 @@ async function main(): Promise<void> {
const externalPackages = parseExternalPackages(
process.env.BT_FUNCTIONS_PUSH_EXTERNAL_PACKAGES,
);
const external = buildExternalPackagePatterns(externalPackages);
const selfContained = process.env.BT_FUNCTIONS_PUSH_SELF_CONTAINED === "1";
const external = selfContained
? ["fsevents", "chokidar"]
: buildExternalPackagePatterns(externalPackages);
const tsconfig = loadTsconfigPath();

const outputDir = path.dirname(outputFile);
Expand Down
140 changes: 130 additions & 10 deletions scripts/functions-runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import inspect
import json
import os
import re
import sys
from contextlib import nullcontext
from typing import Any
Expand All @@ -28,9 +29,9 @@ def to_json_value(value: Any) -> Any:
return [to_json_value(item) for item in value]
if isinstance(value, dict):
return {str(key): to_json_value(val) for key, val in value.items()}
if hasattr(value, "model_dump"):
if hasattr(value, "model_dump") and not isinstance(value, type):
return to_json_value(value.model_dump())
if hasattr(value, "dict"):
if hasattr(value, "dict") and not isinstance(value, type):
return to_json_value(value.dict())
if hasattr(value, "__dict__"):
result: dict[str, Any] = {}
Expand All @@ -42,21 +43,31 @@ def to_json_value(value: Any) -> Any:
return str(value)


def load_framework_globals() -> tuple[Any, Any, Any]:
def load_framework_globals() -> tuple[Any, Any, Any, Any]:
# Prefer current SDK layout first:
# - braintrust.framework2 exposes module-level `global_`
# - braintrust.framework exposes `_set_lazy_load`
try:
from braintrust.framework import _set_lazy_load as lazy
from braintrust.framework2 import global_ as global_state

return global_state.functions, global_state.prompts, lazy
try:
from braintrust.framework import _evals
except (ImportError, ModuleNotFoundError):
_evals = None

return global_state.functions, global_state.prompts, lazy, _evals
except (ImportError, ModuleNotFoundError):
# Backward compatibility with older SDK layout.
from braintrust.framework2.global_ import functions, prompts
from braintrust.framework2.lazy_load import _set_lazy_load as lazy

return functions, prompts, lazy
try:
from braintrust.framework import _evals
except (ImportError, ModuleNotFoundError):
_evals = None

return functions, prompts, lazy, _evals


def normalize_project_selector(project: Any) -> tuple[str | None, str | None]:
Expand Down Expand Up @@ -277,16 +288,113 @@ async def collect_function_event_entries(prompts_registry: Any) -> list[dict[str
return entries


def slugify(text: str) -> str:
return re.sub(r"^-|-$", "", re.sub(r"[^a-z0-9]+", "-", text.lower()))


def collect_evaluator_entries(evals_registry: Any, source_file: str) -> list[dict[str, Any]]:
if evals_registry is None:
return []

evaluators = getattr(evals_registry, "evaluators", None)
if not evaluators or not isinstance(evaluators, dict):
return []

entries: list[dict[str, Any]] = []
stem_base, _ = os.path.splitext(os.path.basename(source_file))
stem = re.sub(r"\.eval$", "", stem_base)

for eval_name, instance in evaluators.items():
if instance is None:
continue
evaluator = getattr(instance, "evaluator", None)
if evaluator is None:
continue

project_name = getattr(evaluator, "project_name", None)
project_id, proj_name = normalize_project_selector(
{"project_name": project_name} if isinstance(project_name, str) else None
)

scores = getattr(evaluator, "scores", []) or []
score_descriptors = [
{"name": getattr(score, "__name__", f"scorer_{i}")}
for i, score in enumerate(scores)
]

evaluator_definition: dict[str, Any] = {"scores": score_descriptors}

raw_params = getattr(evaluator, "parameters", None)
if raw_params is not None:
marker = getattr(raw_params, "__braintrust_parameters_marker", None)
if marker is True:
evaluator_definition["parameters"] = {
"type": "braintrust.parameters",
"schema": getattr(raw_params, "schema", None),
"source": {
"parametersId": getattr(raw_params, "id", None),
"slug": getattr(raw_params, "slug", None),
"name": getattr(raw_params, "name", None),
"projectId": getattr(raw_params, "projectId", None),
"version": getattr(raw_params, "version", None),
},
}
else:
# Use the braintrust SDK's parameters_to_json_schema when
# available so that Pydantic model classes are converted to
# proper staticParametersSchema entries (type: "data" with a
# JSON Schema) that the UI can parse.
try:
from braintrust.parameters import parameters_to_json_schema
serialized = parameters_to_json_schema(raw_params)
except Exception:
serialized = to_json_value(raw_params)
if serialized is not None:
evaluator_definition["parameters"] = serialized

base_entry: dict[str, Any] = {"kind": "code"}
if project_id:
base_entry["project_id"] = project_id
if proj_name:
base_entry["project_name"] = proj_name

# Sandbox entry only — task and scorer entries are pushed separately
# when the eval is actually run, matching the Python SDK behavior.
sandbox_entry = {
**base_entry,
"name": f"Eval {eval_name} sandbox",
"slug": slugify(f"{stem}-{eval_name}-sandbox"),
"function_type": "sandbox",
"location": {
"type": "sandbox",
"sandbox_spec": {"provider": "lambda"},
"entrypoints": [os.path.relpath(source_file)],
"eval_name": eval_name,
"evaluator_definition": evaluator_definition,
},
"metadata": {"_bt_sandbox_group_name": stem},
}
entries.append(sandbox_entry)

return entries


async def process_file(file_path: str) -> dict[str, Any]:
abs_path = os.path.abspath(file_path)
cwd = os.getcwd()
if cwd not in sys.path:
sys.path.insert(0, cwd)

purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})
functions_registry, prompts_registry, lazy_loader = load_framework_globals()
functions_registry, prompts_registry, lazy_loader, evals_registry = load_framework_globals()
clear_registry(functions_registry)
clear_registry(prompts_registry)
if (
evals_registry is not None
and hasattr(evals_registry, "evaluators")
and isinstance(evals_registry.evaluators, dict)
):
evals_registry.evaluators.clear()
purge_local_modules(cwd, preserve_modules={__name__, "python_runner_common"})

module_name = import_module_name_from_cwd(cwd, abs_path)
if module_name is None:
Expand All @@ -298,12 +406,13 @@ async def process_file(file_path: str) -> dict[str, Any]:
import_file(module_name, abs_path, extra_paths)
code_entries = collect_code_entries(functions_registry)
event_entries = await collect_function_event_entries(prompts_registry)
entries = [*code_entries, *event_entries]
evaluator_entries = collect_evaluator_entries(evals_registry, abs_path)
entries = [*code_entries, *event_entries, *evaluator_entries]
file_manifest: dict[str, Any] = {
"source_file": abs_path,
"entries": entries,
}
if code_entries:
if code_entries or evaluator_entries:
runner_root = os.path.dirname(os.path.abspath(__file__))
project_root = os.path.abspath(cwd)
path_rest: list[str] = []
Expand Down Expand Up @@ -350,13 +459,24 @@ async def process_file(file_path: str) -> dict[str, Any]:
continue
seen_sources.add(init_source)
bundled_sources.append(init_source)
# Compute entry_module as a CWD-relative dotted path so that the
# archive root inferred by push.rs walks back to CWD, matching
# the Python SDK behavior and allowing sibling-package imports.
rel_path = os.path.relpath(abs_path, cwd)
archive_module = re.sub(r"\.py$", "", rel_path).replace("-", "_").replace(os.sep, ".")
file_manifest["python_bundle"] = {
"entry_module": module_name,
"entry_module": archive_module,
"sources": bundled_sources,
}

clear_registry(functions_registry)
clear_registry(prompts_registry)
if (
evals_registry is not None
and hasattr(evals_registry, "evaluators")
and isinstance(evals_registry.evaluators, dict)
):
evals_registry.evaluators.clear()
return file_manifest


Expand Down
112 changes: 112 additions & 0 deletions scripts/functions-runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,20 @@ type Manifest = {
files: ManifestFile[];
};

function slugify(input: string): string {
return input
.toLowerCase()
.replace(/[^a-z0-9]+/g, "-")
.replace(/^-|-$/g, "");
}

function extractScoreName(score: unknown, idx: number): string {
if (typeof score === "function" && typeof score.name === "string") {
return score.name || `scorer_${idx}`;
}
return `scorer_${idx}`;
}

type EvalRegistry = NonNullable<typeof globalThis._evals>;
type ZodToJsonSchemaFn = (schema: unknown) => unknown;
type ZodV4ToJsonSchemaFn = (
Expand Down Expand Up @@ -558,6 +572,100 @@ function collectCodeEntries(items: CodeRegistryItem[]): CodeEntry[] {
return entries;
}

function collectEvaluatorEntries(
evaluators: Record<string, unknown>,
sourceFilePath: string,
): CodeEntry[] {
const entries: CodeEntry[] = [];
const ext = path.extname(sourceFilePath);
const stem = path.basename(sourceFilePath, ext).replace(/\.eval$/, "");

for (const [evalName, entry] of Object.entries(evaluators)) {
if (!entry || typeof entry !== "object") {
continue;
}

const evaluator = (entry as Record<string, unknown>).evaluator;
if (!evaluator || typeof evaluator !== "object") {
continue;
}

const evalObj = evaluator as Record<string, unknown>;
const projectName =
typeof evalObj.project_name === "string"
? evalObj.project_name
: undefined;
const scores = Array.isArray(evalObj.scores) ? evalObj.scores : [];

const selector = asProjectSelector(
typeof projectName === "string" ? { name: projectName } : undefined,
);
const projectId =
typeof selector.project_id === "string" ? selector.project_id : undefined;
const selectorProjectName =
typeof selector.project_name === "string"
? selector.project_name
: undefined;

const scoreDescriptors = scores.map((s: unknown, i: number) => ({
name: extractScoreName(s, i),
}));

const evaluatorDefinition: JsonObject = {
scores: scoreDescriptors as JsonValue,
};

const rawParams = evalObj.parameters;
if (rawParams !== undefined && rawParams !== null) {
const marker =
rawParams !== null &&
typeof rawParams === "object" &&
(rawParams as Record<string, unknown>)
.__braintrust_parameters_marker === true;
if (marker) {
const paramObj = rawParams as Record<string, unknown>;
evaluatorDefinition.parameters = toJsonValue({
type: "braintrust.parameters",
schema: paramObj.schema,
source: {
parametersId: paramObj.id,
slug: paramObj.slug,
name: paramObj.name,
projectId: paramObj.projectId,
version: paramObj.version,
},
} as JsonValue);
} else {
const serialized = toJsonValue(rawParams as JsonValue);
if (serialized !== undefined) {
evaluatorDefinition.parameters = serialized;
}
}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we reuse the existing evaluator-parameter serialization flow from eval-runner.ts here, instead of the direct toJsonValue(rawParams) path? I think using the shared serializer covers some cases with Zod->JSON Schema conversion that help avoid malformed evaluator_definition.parameters for parameterized evals.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried extracting this into a new file, but the eval-runner doesn't play nice with local exports since it needs to run with multiple runner types in a way the functions-runner doesn't. I'm going to just copy the logic over from eval-runner. I'm a bit hesitant to move to a complex local file structure for these runners since they need to work in so many situations, but let me know if you think that's the wrong approach

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit hesitant to move to a complex local file structure for these runners since they need to work in so many situations, but let me know if you think that's the wrong approach

Nope, totally agree with this.

}

// Sandbox entry only — task and scorer entries are pushed separately
// when the eval is actually run, matching the Python SDK behavior.
entries.push({
kind: "code",
project_id: projectId,
project_name: selectorProjectName,
name: `Eval ${evalName} sandbox`,
slug: slugify(`${stem}-${evalName}-sandbox`),
function_type: "sandbox",
location: {
type: "sandbox",
sandbox_spec: { provider: "lambda" },
entrypoints: [path.relative(process.cwd(), sourceFilePath)],
eval_name: evalName,
evaluator_definition: evaluatorDefinition as JsonValue,
} as JsonValue,
metadata: { _bt_sandbox_group_name: stem },
});
}

return entries;
}

async function processFile(filePath: string): Promise<ManifestFile> {
const absolutePath = path.resolve(process.cwd(), filePath);
const fallbackRegistry = freshRegistry();
Expand All @@ -577,6 +685,10 @@ async function processFile(filePath: string): Promise<ManifestFile> {
registry.parameters as EventRegistryItem[],
false,
)),
...collectEvaluatorEntries(
registry.evaluators as Record<string, unknown>,
absolutePath,
),
];

return {
Expand Down
Loading
Loading