NVIDIA · rapids-bot · Jun 2, 2026 · May 29, 2026 · May 29, 2026 · May 29, 2026
@@ -15,6 +15,7 @@
 
 import logging
 import time
+import uuid
 from typing import Any
 
 import langsmith
@@ -53,11 +54,9 @@ def _humanize_dataset_name(name: str) -> str:
 def _span_id_to_langsmith_run_id(span_id: int) -> str:
     """Derive LangSmith run_id from OTEL span_id.
 
-    LangSmith deterministically maps OTEL span_ids to run UUIDs:
-    the first 8 bytes are zeroed, the last 8 bytes are the span_id.
+    LangSmith deterministically maps OTEL span_ids to run UUIDs.
     """
-    hex_str = format(span_id, "016x")
-    return f"00000000-0000-0000-{hex_str[:4]}-{hex_str[4:]}"
+    return str(uuid.UUID(int=span_id))
 
 
 def _eager_link_run_to_item(
@@ -535,9 +534,9 @@ def get_eval_project_name(self) -> str:
     def on_dataset_loaded(self, *, dataset_name: str, items: list) -> None:
         self._dataset_name = dataset_name
         pretty_name = _humanize_dataset_name(dataset_name)
-        ls_dataset_name = f"Benchmark Dataset ({pretty_name})"
+        ls_dataset_name = dataset_name
         try:
-            ds = self._client.create_dataset(dataset_name=ls_dataset_name, description="NAT eval dataset")
+            ds = self._client.create_dataset(dataset_name=ls_dataset_name, description=pretty_name)
             self._dataset_id = str(ds.id)
         except langsmith.utils.LangSmithConflictError:
             existing = self._client.read_dataset(dataset_name=ls_dataset_name)

@@ -350,6 +350,17 @@ def _humanize_param_name(param_name: str) -> str:
                 break
         return _humanize_dataset_name(name)
 
+    @staticmethod
+    def _clean_handle_part(value: str, fallback: str) -> str:
+        """Clean a prompt handle component for LangSmith prompt repos."""
+        slug = re.sub(r"[^a-z0-9_-]+", "-", value.lower())
+        slug = re.sub(r"-+", "-", slug).strip("-_")
+        if not slug:
+            return fallback
+        if not slug[0].isalpha():
+            return f"{fallback}-{slug}"
+        return slug
+
     def _get_prompt_repo_name(self, param_name: str) -> str:
         """Get or create a unique prompt repo name for this optimization run.
 
@@ -365,10 +376,10 @@ def _get_prompt_repo_name(self, param_name: str) -> str:
             if param_slug.startswith(prefix):
                 param_slug = param_slug[len(prefix):]
                 break
-        param_slug = param_slug.lower().replace(".", "-").replace("_", "-")
+        param_slug = self._clean_handle_part(param_slug, fallback="prompt")
 
         # Prefix with project name
-        project_slug = (self._project.lower().replace(" ", "-").replace("_", "-"))
+        project_slug = self._clean_handle_part(self._project, fallback="project")
         base = f"{project_slug}-{param_slug}"
 
         pattern = re.compile(re.escape(base) + r"-run-(\d+)$")

@@ -64,6 +64,8 @@ def test_on_dataset_loaded_stores_example_ids(self, eval_cb):
 
     def test_on_dataset_loaded_reuses_existing_dataset_and_loads_examples(self, eval_cb):
         from nat.plugins.langchain.langsmith.langsmith_evaluation_callback import langsmith
+
+        dataset_name = "existing"
         self.mock_client.create_dataset.side_effect = langsmith.utils.LangSmithConflictError("exists")
         mock_existing = MagicMock()
         mock_existing.id = "ds-existing"
@@ -74,9 +76,9 @@ def test_on_dataset_loaded_reuses_existing_dataset_and_loads_examples(self, eval
         mock_ex.inputs = {"nat_item_id": "1", "question": "q"}
         self.mock_client.list_examples.return_value = [mock_ex]
         eval_cb.on_dataset_loaded(
-            dataset_name="existing",
+            dataset_name=dataset_name,
             items=[EvalInputItem(id=1, input_obj="q", expected_output_obj="a", full_dataset_entry={})])
-        self.mock_client.read_dataset.assert_called_once_with(dataset_name="Benchmark Dataset (Existing)")
+        self.mock_client.read_dataset.assert_called_once_with(dataset_name=dataset_name)
         self.mock_client.create_example.assert_not_called()
         # Should have loaded the existing example ID keyed by nat_item_id
         assert eval_cb._example_ids["1"] == "ex-existing"
@@ -427,6 +429,35 @@ def opt_cb(self):
         from nat.plugins.langchain.langsmith.langsmith_optimization_callback import LangSmithOptimizationCallback
         return LangSmithOptimizationCallback(project="test-proj")
 
+    @pytest.mark.parametrize(
+        ("param_name", "expected_repo_name"),
+        [
+            (
+                "functions.Agent.prompt.value",
+                "project-123-project-name_with-spaces-agent-prompt-value-run-1",
+            ),
+            (
+                "llms.9-NIM.temperature",
+                "project-123-project-name_with-spaces-prompt-9-nim-temperature-run-1",
+            ),
+            (
+                "workflow.__",
+                "project-123-project-name_with-spaces-prompt-run-1",
+            ),
+            (
+                "custom/path with spaces",
+                "project-123-project-name_with-spaces-custom-path-with-spaces-run-1",
+            ),
+        ],
+    )
+    def test_get_prompt_repo_name_cleans_langsmith_handle(self, opt_cb, param_name, expected_repo_name):
+        opt_cb._project = "123 Project.Name_With Spaces"
+        self.mock_client.list_prompts.return_value.repos = []
+
+        repo_name = opt_cb._get_prompt_repo_name(param_name)
+
+        assert repo_name == expected_repo_name
+
     @patch("nat.plugins.langchain.langsmith.langsmith_evaluation_callback.time.sleep")
     def test_on_trial_end_links_otel_runs(self, _mock_sleep, opt_cb):
         # Simulate dataset already created