Skip to content
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import logging
import time
import uuid
from typing import Any

import langsmith
Expand Down Expand Up @@ -53,11 +54,9 @@ def _humanize_dataset_name(name: str) -> str:
def _span_id_to_langsmith_run_id(span_id: int) -> str:
"""Derive LangSmith run_id from OTEL span_id.

LangSmith deterministically maps OTEL span_ids to run UUIDs:
the first 8 bytes are zeroed, the last 8 bytes are the span_id.
LangSmith deterministically maps OTEL span_ids to run UUIDs.
"""
hex_str = format(span_id, "016x")
return f"00000000-0000-0000-{hex_str[:4]}-{hex_str[4:]}"
return str(uuid.UUID(int=span_id))
Comment thread
dagardner-nv marked this conversation as resolved.


def _eager_link_run_to_item(
Expand Down Expand Up @@ -535,9 +534,9 @@ def get_eval_project_name(self) -> str:
def on_dataset_loaded(self, *, dataset_name: str, items: list) -> None:
self._dataset_name = dataset_name
pretty_name = _humanize_dataset_name(dataset_name)
ls_dataset_name = f"Benchmark Dataset ({pretty_name})"
ls_dataset_name = dataset_name
try:
ds = self._client.create_dataset(dataset_name=ls_dataset_name, description="NAT eval dataset")
ds = self._client.create_dataset(dataset_name=ls_dataset_name, description=pretty_name)
self._dataset_id = str(ds.id)
except langsmith.utils.LangSmithConflictError:
existing = self._client.read_dataset(dataset_name=ls_dataset_name)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,17 @@ def _humanize_param_name(param_name: str) -> str:
break
return _humanize_dataset_name(name)

@staticmethod
def _clean_handle_part(value: str, fallback: str) -> str:
"""Clean a prompt handle component for LangSmith prompt repos."""
slug = re.sub(r"[^a-z0-9_-]+", "-", value.lower())
slug = re.sub(r"-+", "-", slug).strip("-_")
if not slug:
return fallback
if not slug[0].isalpha():
return f"{fallback}-{slug}"
return slug

def _get_prompt_repo_name(self, param_name: str) -> str:
"""Get or create a unique prompt repo name for this optimization run.

Expand All @@ -365,10 +376,10 @@ def _get_prompt_repo_name(self, param_name: str) -> str:
if param_slug.startswith(prefix):
param_slug = param_slug[len(prefix):]
break
param_slug = param_slug.lower().replace(".", "-").replace("_", "-")
param_slug = self._clean_handle_part(param_slug, fallback="prompt")

# Prefix with project name
project_slug = (self._project.lower().replace(" ", "-").replace("_", "-"))
project_slug = self._clean_handle_part(self._project, fallback="project")
base = f"{project_slug}-{param_slug}"

pattern = re.compile(re.escape(base) + r"-run-(\d+)$")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ def test_on_dataset_loaded_stores_example_ids(self, eval_cb):

def test_on_dataset_loaded_reuses_existing_dataset_and_loads_examples(self, eval_cb):
from nat.plugins.langchain.langsmith.langsmith_evaluation_callback import langsmith

dataset_name = "existing"
self.mock_client.create_dataset.side_effect = langsmith.utils.LangSmithConflictError("exists")
mock_existing = MagicMock()
mock_existing.id = "ds-existing"
Expand All @@ -74,9 +76,9 @@ def test_on_dataset_loaded_reuses_existing_dataset_and_loads_examples(self, eval
mock_ex.inputs = {"nat_item_id": "1", "question": "q"}
self.mock_client.list_examples.return_value = [mock_ex]
eval_cb.on_dataset_loaded(
dataset_name="existing",
dataset_name=dataset_name,
items=[EvalInputItem(id=1, input_obj="q", expected_output_obj="a", full_dataset_entry={})])
self.mock_client.read_dataset.assert_called_once_with(dataset_name="Benchmark Dataset (Existing)")
self.mock_client.read_dataset.assert_called_once_with(dataset_name=dataset_name)
self.mock_client.create_example.assert_not_called()
# Should have loaded the existing example ID keyed by nat_item_id
assert eval_cb._example_ids["1"] == "ex-existing"
Expand Down Expand Up @@ -427,6 +429,35 @@ def opt_cb(self):
from nat.plugins.langchain.langsmith.langsmith_optimization_callback import LangSmithOptimizationCallback
return LangSmithOptimizationCallback(project="test-proj")

@pytest.mark.parametrize(
("param_name", "expected_repo_name"),
[
(
"functions.Agent.prompt.value",
"project-123-project-name_with-spaces-agent-prompt-value-run-1",
),
(
"llms.9-NIM.temperature",
"project-123-project-name_with-spaces-prompt-9-nim-temperature-run-1",
),
(
"workflow.__",
"project-123-project-name_with-spaces-prompt-run-1",
),
(
"custom/path with spaces",
"project-123-project-name_with-spaces-custom-path-with-spaces-run-1",
),
],
)
def test_get_prompt_repo_name_cleans_langsmith_handle(self, opt_cb, param_name, expected_repo_name):
opt_cb._project = "123 Project.Name_With Spaces"
self.mock_client.list_prompts.return_value.repos = []

repo_name = opt_cb._get_prompt_repo_name(param_name)

assert repo_name == expected_repo_name

@patch("nat.plugins.langchain.langsmith.langsmith_evaluation_callback.time.sleep")
def test_on_trial_end_links_otel_runs(self, _mock_sleep, opt_cb):
# Simulate dataset already created
Expand Down
Loading
Loading