feat(langchain): add debug level to langsmith:hidden tagged spans (#1077

)
langfuse · Jan 15, 2025 · 6c74dc3 · 6c74dc3
1 parent f9eefc1
commit 6c74dc3
Show file tree

Hide file tree

Showing 6 changed files with 227 additions and 7 deletions.
diff --git a/langfuse/callback/langchain.py b/langfuse/callback/langchain.py
@@ -51,6 +51,8 @@
         "Please install langchain to use the Langfuse langchain integration: 'pip install langchain'"
     )
 
+LANGSMITH_TAG_HIDDEN: str = "langsmith:hidden"
+
 
 class LangchainCallbackHandler(
     LangchainBaseCallbackHandler, LangfuseBaseCallbackHandler
@@ -260,6 +262,7 @@ def on_chain_start(
                 "metadata": self.__join_tags_and_metadata(tags, metadata),
                 "input": inputs,
                 "version": self.version,
+                "level": "DEBUG" if tags and LANGSMITH_TAG_HIDDEN in tags else None,
             }
             if parent_run_id is None:
                 if self.root_span is None:
@@ -581,6 +584,7 @@ def on_tool_start(
                 input=input_str,
                 metadata=meta,
                 version=self.version,
+                level="DEBUG" if tags and LANGSMITH_TAG_HIDDEN in tags else None,
             )
             self.next_span_id = None
         except Exception as e:
@@ -619,6 +623,7 @@ def on_retriever_start(
                     "metadata": self.__join_tags_and_metadata(tags, metadata),
                     "input": query,
                     "version": self.version,
+                    "level": "DEBUG" if tags and LANGSMITH_TAG_HIDDEN in tags else None,
                 }
                 if self.root_span is None:
                     self.runs[run_id] = self.trace.span(**content)
@@ -631,6 +636,7 @@ def on_retriever_start(
                     input=query,
                     metadata=self.__join_tags_and_metadata(tags, metadata),
                     version=self.version,
+                    level="DEBUG" if tags and LANGSMITH_TAG_HIDDEN in tags else None,
                 )
                 self.next_span_id = None
         except Exception as e:

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,8 +52,9 @@ langchain-aws = ">=0.1.3,<0.3"
 langchain-ollama = "^0.2.0"
 langchain-cohere = "^0.3.3"
 langchain-huggingface = "^0.1.2"
-
 langchain-community = ">=0.2.14,<0.4"
+langgraph = "^0.2.62"
+
 [tool.poetry.group.docs.dependencies]
 pdoc = "^14.4.0"
 

diff --git a/tests/test_core_sdk.py b/tests/test_core_sdk.py
@@ -1467,12 +1467,15 @@ def mask_func(data):
     api_wrapper = LangfuseAPI()
 
     trace = langfuse.trace(name="test_trace", input={"sensitive": "data"})
+    sleep(0.1)
     trace.update(output={"more": "sensitive"})
 
     gen = trace.generation(name="test_gen", input={"prompt": "secret"})
+    sleep(0.1)
     gen.update(output="new_confidential")
 
     span = trace.span(name="test_span", input={"data": "private"})
+    sleep(0.1)
     span.update(output="new_classified")
 
     langfuse.flush()

diff --git a/tests/test_datasets.py b/tests/test_datasets.py
@@ -453,8 +453,6 @@ def test_llama_index_dataset():
         "dataset_id": dataset.id,
     }
 
-    assert sorted_observations[0].name == "query"
-
 
 def sorted_dependencies(
     observations: List[Observation],

diff --git a/tests/test_langchain.py b/tests/test_langchain.py
@@ -3,7 +3,7 @@
 import string
 import time
 from time import sleep
-from typing import Any, Dict, List, Mapping, Optional
+from typing import Any, Dict, List, Literal, Mapping, Optional
 
 import pytest
 from langchain.agents import AgentType, initialize_agent
@@ -30,11 +30,15 @@
 from langchain_core.language_models.llms import LLM
 from langchain_core.output_parsers import StrOutputParser
 from langchain_core.runnables.base import RunnableLambda
-from langchain_core.tools import StructuredTool
+from langchain_core.tools import StructuredTool, tool
 from langchain_openai import AzureChatOpenAI, ChatOpenAI, OpenAI
+from langgraph.checkpoint.memory import MemorySaver
+from langgraph.graph import END, START, MessagesState, StateGraph
+from langgraph.prebuilt import ToolNode
 from pydantic.v1 import BaseModel, Field
 
 from langfuse.callback import CallbackHandler
+from langfuse.callback.langchain import LANGSMITH_TAG_HIDDEN
 from langfuse.client import Langfuse
 from tests.api_wrapper import LangfuseAPI
 from tests.utils import create_uuid, encode_file_to_base64, get_api
@@ -2223,3 +2227,92 @@ def test_multimodal():
         "@@@langfuseMedia:type=image/jpeg|id="
         in trace.observations[0].input[0]["content"][1]["image_url"]["url"]
     )
+
+
+def test_langgraph():
+    # Define the tools for the agent to use
+    @tool
+    def search(query: str):
+        """Call to surf the web."""
+        # This is a placeholder, but don't tell the LLM that...
+        if "sf" in query.lower() or "san francisco" in query.lower():
+            return "It's 60 degrees and foggy."
+        return "It's 90 degrees and sunny."
+
+    tools = [search]
+    tool_node = ToolNode(tools)
+    model = ChatOpenAI(model="gpt-4o-mini").bind_tools(tools)
+
+    # Define the function that determines whether to continue or not
+    def should_continue(state: MessagesState) -> Literal["tools", END]:
+        messages = state["messages"]
+        last_message = messages[-1]
+        # If the LLM makes a tool call, then we route to the "tools" node
+        if last_message.tool_calls:
+            return "tools"
+        # Otherwise, we stop (reply to the user)
+        return END
+
+    # Define the function that calls the model
+    def call_model(state: MessagesState):
+        messages = state["messages"]
+        response = model.invoke(messages)
+        # We return a list, because this will get added to the existing list
+        return {"messages": [response]}
+
+    # Define a new graph
+    workflow = StateGraph(MessagesState)
+
+    # Define the two nodes we will cycle between
+    workflow.add_node("agent", call_model)
+    workflow.add_node("tools", tool_node)
+
+    # Set the entrypoint as `agent`
+    # This means that this node is the first one called
+    workflow.add_edge(START, "agent")
+
+    # We now add a conditional edge
+    workflow.add_conditional_edges(
+        # First, we define the start node. We use `agent`.
+        # This means these are the edges taken after the `agent` node is called.
+        "agent",
+        # Next, we pass in the function that will determine which node is called next.
+        should_continue,
+    )
+
+    # We now add a normal edge from `tools` to `agent`.
+    # This means that after `tools` is called, `agent` node is called next.
+    workflow.add_edge("tools", "agent")
+
+    # Initialize memory to persist state between graph runs
+    checkpointer = MemorySaver()
+
+    # Finally, we compile it!
+    # This compiles it into a LangChain Runnable,
+    # meaning you can use it as you would any other runnable.
+    # Note that we're (optionally) passing the memory when compiling the graph
+    app = workflow.compile(checkpointer=checkpointer)
+
+    handler = CallbackHandler()
+
+    # Use the Runnable
+    final_state = app.invoke(
+        {"messages": [HumanMessage(content="what is the weather in sf")]},
+        config={"configurable": {"thread_id": 42}, "callbacks": [handler]},
+    )
+    print(final_state["messages"][-1].content)
+    handler.flush()
+
+    trace = get_api().trace.get(handler.get_trace_id())
+
+    hidden_count = 0
+
+    for observation in trace.observations:
+        if LANGSMITH_TAG_HIDDEN in observation.metadata.get("tags", []):
+            hidden_count += 1
+            assert observation.level == "DEBUG"
+
+        else:
+            assert observation.level == "DEFAULT"
+
+    assert hidden_count > 0