truera · sfc-gh-dkurokawa · Jan 17, 2025 · Jan 19, 2025 · Jan 21, 2025 · Jan 21, 2025
diff --git a/Makefile b/Makefile
@@ -41,7 +41,11 @@ env-tests:
 		ruff \
 
 env-tests-required:
-	poetry install --only required \
+	pip install zipp==3.20.1 \
+		&& echo "INSTALLED zipp" \
+		&& pip install alembic==1.14.0 \
+		&& echo "INSTALLED alembic" \
+		&& poetry install --only required \
 		&& make env-tests
 
 env-tests-optional: env env-tests

diff --git a/poetry.lock b/poetry.lock
diff --git a/src/apps/llamaindex/trulens/apps/llamaindex/tru_llama.py b/src/apps/llamaindex/trulens/apps/llamaindex/tru_llama.py
@@ -9,6 +9,7 @@
     AsyncGenerator,
     Callable,
     ClassVar,
+    Dict,
     Generator,
     List,
     Optional,
@@ -32,6 +33,8 @@
 from trulens.core.utils import pyschema as pyschema_utils
 from trulens.core.utils import python as python_utils
 from trulens.core.utils import serial as serial_utils
+from trulens.experimental.otel_tracing.core.span import Attributes
+from trulens.otel.semconv.trace import SpanAttributes
 
 T = TypeVar("T")
 
@@ -72,6 +75,7 @@
     from llama_index.core.response_synthesizers import Refine
     from llama_index.core.retrievers import BaseRetriever
     from llama_index.core.schema import BaseComponent
+    from llama_index.core.schema import NodeWithScore
     from llama_index.core.schema import QueryBundle
     from llama_index.core.service_context_elements.llm_predictor import (
         BaseLLMPredictor,
@@ -133,6 +137,41 @@
 pp = PrettyPrinter()
 
 
+def _retrieval_span() -> Dict[str, Union[SpanAttributes.SpanType, Attributes]]:
+    def _full_scoped_span_attributes(
+        ret, exception, *args, **kwargs
+    ) -> Attributes:
+        attributes = {}
+        # Guess query text.
+        possible_query_texts = []
+        for k, v in kwargs.items():
+            if isinstance(v, str):
+                possible_query_texts.append(v)
+            elif isinstance(v, QueryBundle):
+                possible_query_texts.append(v.query_str)
+        # Guess retrieved contexts.
+        retrieved_context = ret
+        if isinstance(ret, list):
+            if all(isinstance(curr, NodeWithScore) for curr in ret):
+                retrieved_context = [curr.get_content() for curr in ret]
+            elif all(hasattr(curr, "text") for curr in ret):
+                retrieved_context = [curr.text for curr in ret]
+        # Return.
+        attributes = {
+            SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS: retrieved_context
+        }
+        if len(possible_query_texts) == 1:
+            attributes[SpanAttributes.RETRIEVAL.QUERY_TEXT] = (
+                possible_query_texts[0]
+            )
+        return attributes
+
+    return {
+        "span_type": SpanAttributes.SpanType.RETRIEVAL,
+        "full_scoped_span_attributes": _full_scoped_span_attributes,
+    }
+
+
 class LlamaInstrument(core_instruments.Instrument):
     """Instrumentation for LlamaIndex apps."""
 
@@ -206,15 +245,51 @@ class Default:
                 InstrumentedMethod("acomplete", BaseChatEngine),
                 InstrumentedMethod("stream_complete", BaseChatEngine),
                 InstrumentedMethod("astream_complete", BaseChatEngine),
-                InstrumentedMethod("retrieve", BaseQueryEngine),
-                InstrumentedMethod("_retrieve", BaseQueryEngine),
-                InstrumentedMethod("_aretrieve", BaseQueryEngine),
-                InstrumentedMethod("retrieve", BaseRetriever),
-                InstrumentedMethod("_retrieve", BaseRetriever),
-                InstrumentedMethod("_aretrieve", BaseRetriever),
-                InstrumentedMethod("retrieve", WithFeedbackFilterNodes),
-                InstrumentedMethod("_retrieve", WithFeedbackFilterNodes),
-                InstrumentedMethod("_aretrieve", WithFeedbackFilterNodes),
+                InstrumentedMethod(
+                    "retrieve",
+                    BaseQueryEngine,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_retrieve",
+                    BaseQueryEngine,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_aretrieve",
+                    BaseQueryEngine,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "retrieve",
+                    BaseRetriever,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_retrieve",
+                    BaseRetriever,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_aretrieve",
+                    BaseRetriever,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "retrieve",
+                    WithFeedbackFilterNodes,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_retrieve",
+                    WithFeedbackFilterNodes,
+                    **_retrieval_span(),
+                ),
+                InstrumentedMethod(
+                    "_aretrieve",
+                    WithFeedbackFilterNodes,
+                    **_retrieval_span(),
+                ),
                 InstrumentedMethod("_postprocess_nodes", BaseNodePostprocessor),
                 InstrumentedMethod("_run_component", QueryEngineComponent),
                 InstrumentedMethod("_run_component", RetrieverComponent),

diff --git a/src/core/pyproject.toml b/src/core/pyproject.toml
@@ -41,7 +41,7 @@ rich = "^13.6"
 requests = "^2.31"
 packaging = ">=23.0"
 sqlalchemy = "^2.0"
-alembic = "^1.8.1"
+alembic = "^1.8.1,<1.14.1"
 nest-asyncio = "^1.5"
 python-dotenv = ">=0.21,<2.0"
 importlib-resources = "^6.0"

diff --git a/src/core/trulens/_mods.py b/src/core/trulens/_mods.py
@@ -77,8 +77,6 @@
 from trulens.dashboard.utils import records_utils
 from trulens.dashboard.ux import components as dashboard_components
 from trulens.dashboard.ux import styles as dashboard_styles
-from trulens.experimental.otel_tracing.core import otel as core_otel
-from trulens.experimental.otel_tracing.core import trace as core_trace
 from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils
 from trulens.feedback import embeddings as feedback_embeddings
 from trulens.feedback import feedback as mod_feedback

diff --git a/src/core/trulens/core/app.py b/src/core/trulens/core/app.py
@@ -764,25 +764,6 @@ def main_output(
     ) -> str:
         return signature_utils.main_output(func, ret)
 
-    # Experimental OTEL WithInstrumentCallbacks requirement
-    def _on_new_recording_span(
-        self,
-        recording_span: Any,  # Any = mod_trace.Span,
-    ):
-        from trulens.experimental.otel_tracing.core.app import _App
-
-        return _App._on_new_recording_span(self, recording_span)
-
-    # Experimental OTEL WithInstrumentCallbacks requirement
-    def _on_new_root_span(
-        self,
-        recording: core_instruments._RecordingContext,
-        root_span: Any,  # Any = mod_trace.Span,
-    ) -> record_schema.Record:
-        from trulens.experimental.otel_tracing.core.app import _App
-
-        return _App._on_new_root_span(self, recording, root_span)
-
     # WithInstrumentCallbacks requirement
     def on_method_instrumented(
         self, obj: object, func: Callable, path: serial_utils.Lens

diff --git a/src/core/trulens/core/feedback/endpoint.py b/src/core/trulens/core/feedback/endpoint.py
@@ -26,10 +26,7 @@
 
 import pydantic
 from pydantic import Field
-from pydantic import PrivateAttr
 import requests
-from trulens.core import experimental as core_experimental
-from trulens.core import session as core_session
 from trulens.core.schema import base as base_schema
 from trulens.core.utils import asynchro as asynchro_utils
 from trulens.core.utils import pace as pace_utils
@@ -223,11 +220,6 @@ class EndpointSetup:
     callback_name: str = Field(exclude=True)
     """Name of variable that stores the callback noted above."""
 
-    _experimental_wrapper_callback_class: Optional[Type[Any]] = PrivateAttr(
-        None
-    )  # Any actually WrapperEndpointCallback but cannot import it here
-    """EXPERIMENTAL(otel_tracing): callback class to use for usage tracking."""
-
     _context_endpoints: ClassVar[contextvars.ContextVar] = (
         contextvars.ContextVar("endpoints", default={})
     )
@@ -575,17 +567,6 @@ def track_all_costs_tally(
                 change after this method returns in case of Awaitable results.
         """
 
-        session = core_session.TruSession()
-
-        if session.experimental_feature(
-            core_experimental.Feature.OTEL_TRACING, freeze=True
-        ):
-            from trulens.experimental.otel_tracing.core.feedback.endpoint import (
-                _Endpoint,
-            )
-
-            return _Endpoint.track_all_costs_tally(__func, *args, **kwargs)
-
         result, cbs = Endpoint.track_all_costs(
             __func,
             *args,
@@ -746,17 +727,6 @@ def _have_context() -> bool:
     def wrap_function(self, func):
         """Create a wrapper of the given function to perform cost tracking."""
 
-        session = core_session.TruSession()
-
-        if session.experimental_feature(
-            core_experimental.Feature.OTEL_TRACING, freeze=True
-        ):
-            from trulens.experimental.otel_tracing.core.feedback.endpoint import (
-                _Endpoint,
-            )
-
-            return _Endpoint.wrap_function(self, func)
-
         if python_utils.safe_hasattr(func, INSTRUMENT):
             # Store the types of callback classes that will handle calls to the
             # wrapped function in the INSTRUMENT attribute. This will be used to

diff --git a/src/core/trulens/core/feedback/feedback.py b/src/core/trulens/core/feedback/feedback.py
@@ -733,11 +733,6 @@ def check_selectors(
         check_good: bool = True
 
         for k, q in self.selectors.items():
-            if select_schema.Select.RecordSpans.is_prefix_of(q):
-                # Skip checking for RecordSpans as they are not known ahead of
-                # producing a record.
-                continue
-
             if q.exists(source_data):
                 continue
 

diff --git a/src/core/trulens/core/schema/base.py b/src/core/trulens/core/schema/base.py
@@ -6,7 +6,6 @@
 from typing import Optional
 
 import pydantic
-from trulens.core.utils import containers as container_utils
 from trulens.core.utils import serial as serial_utils
 
 MAX_DILL_SIZE: int = 1024 * 1024  # 1MB
@@ -162,36 +161,6 @@ def latency(self):
         """Latency in seconds."""
         return self.end_time - self.start_time
 
-    @staticmethod
-    def of_ns_timestamps(
-        start_ns_timestamp: int, end_ns_timestamp: Optional[int] = None
-    ) -> Perf:
-        """EXPERIMENTAL(otel_tracing): Create a `Perf` instance from start and
-        end times in nanoseconds since the epoch."""
-
-        return Perf(
-            start_time=container_utils.datetime_of_ns_timestamp(
-                start_ns_timestamp
-            ),
-            end_time=container_utils.datetime_of_ns_timestamp(end_ns_timestamp)
-            if end_ns_timestamp is not None
-            else datetime.datetime.max,
-        )
-
-    @property
-    def start_ns_timestamp(self) -> int:
-        """EXPERIMENTAL(otel_tracing): Start time in number of nanoseconds since
-        the epoch."""
-
-        return container_utils.ns_timestamp_of_datetime(self.start_time)
-
-    @property
-    def end_ns_timestamp(self) -> int:
-        """EXPERIMENTAL(otel_tracing): End time in number of nanoseconds since
-        the epoch."""
-
-        return container_utils.ns_timestamp_of_datetime(self.end_time)
-
 
 # HACK013: Need these if using __future__.annotations .
 Cost.model_rebuild()