Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions sdks/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ classifiers = [
]
dependencies = [
# LLM access is via LangChain; the langchain-* packages pull in provider SDKs as needed.
"httpx>=0.27.0",
"pydantic>=2.0.0",
"textstat>=0.7.0",
"langchain-anthropic>=0.2.0",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
TokenUsage,
prompt_settings_to_extras_value,
)
from learning_commons_evaluators.telemetry import schedule_send_telemetry

InputT = TypeVar("InputT", bound=EvaluationInput)
OutputT = TypeVar("OutputT", bound=EvaluationResult)
Expand Down Expand Up @@ -145,8 +146,7 @@ async def evaluate(
"evaluation end",
extra={"evaluation_metadata": evaluation_metadata},
)
# TODO: add full input to telemetry if enabled
# TODO: send_telemetry(evaluation_metadata)
schedule_send_telemetry(evaluation_metadata, input, self.config)

def evaluate_sync(
self,
Expand Down
12 changes: 12 additions & 0 deletions sdks/python/src/learning_commons_evaluators/schemas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,13 @@
from learning_commons_evaluators.schemas.text_complexity import (
TextComplexityEvaluationInput,
)
from learning_commons_evaluators.schemas.ts_telemetry import (
EvaluationTelemetryStatus,
TelemetryEvent,
TelemetryMetadataPayload,
TelemetryStageDetail,
TelemetryTokenUsage,
)

__all__ = [
"AnyInputSpec",
Expand All @@ -56,6 +63,7 @@
"EvaluationMetadata",
"EvaluationResult",
"EvaluationSettings",
"EvaluationTelemetryStatus",
"EvaluatorMetadata",
"EvaluatorMaturity",
"GradeInputField",
Expand All @@ -67,6 +75,10 @@
"PROMPT_STEP_EXTRA_TOKEN_USAGE",
"Status",
"StepMetadata",
"TelemetryEvent",
"TelemetryMetadataPayload",
"TelemetryStageDetail",
"TelemetryTokenUsage",
"TextComplexityEvaluationInput",
"TextInputField",
"TokenUsage",
Expand Down
87 changes: 78 additions & 9 deletions sdks/python/src/learning_commons_evaluators/schemas/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,19 @@
create_config_no_telemetry, create_config_telemetry_with_full_input).
"""

import uuid
from dataclasses import dataclass, field
from enum import Enum

from pydantic import BaseModel, ConfigDict

from learning_commons_evaluators.logger import Logger, get_logger

DEFAULT_TELEMETRY_EVENTS_ENDPOINT = "https://api.learningcommons.org/evaluators-telemetry/v1/events"

# Shared per process so multiple :class:`EvaluatorConfig` instances derive the same client id.
_PROCESS_CLIENT_ID_SEED = uuid.uuid4()

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P0 - I would strongly recommend making the telemetry anonymous in the config by default, ie. not even needing to specify the config_config_anonymous_telemetry but just using create_config for the happy path default + primary documented case.

# Anonymous (default)
config = create_config(google_llm_provider_config=...)

# Tracked
config = create_config(google_llm_provider_config=..., telemetry_partner_id=LC_KEY)

# Off
config = create_config_no_telemetry(google_llm_provider_config=...)

And then we can module-cache the _ANONYMOUS_CLIENT_ID across create_configs / Evals

# --- LLM provider configs (for LLM calls in prompt steps) ---


Expand Down Expand Up @@ -79,6 +85,7 @@ class EvaluationSettings(BaseModel):
class TelemetryConfig:
"""Config for telemetry."""

endpoint: str = DEFAULT_TELEMETRY_EVENTS_ENDPOINT
telemetry_partner_id: str | None = None
send_full_input_with_telemetry: bool = False

Expand Down Expand Up @@ -106,6 +113,11 @@ class EvaluatorConfig:
logger: Logger = field(default_factory=get_logger)
telemetry: TelemetryConfig = field(default_factory=TelemetryConfig)

# Temporary until we finalize the telemetry API key/client id strategy.
#: UUID v5 namespace for deriving ``X-Client-ID`` when ``telemetry_partner_id`` is an API key.
#: Defaults to a single per-process seed so all configs in one run share the same derived id.
client_id_seed: uuid.UUID = field(default=_PROCESS_CLIENT_ID_SEED)


def create_config(
*,
Expand All @@ -129,38 +141,95 @@ def create_config(
)


def create_config_no_telemetry(
def create_config_telemetry_with_full_input(
*,
google_llm_provider_config: GoogleLLMProviderConfig | None = None,
openai_llm_provider_config: OpenAILLMProviderConfig | None = None,
anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None,
logger: Logger | None = None,
telemetry_partner_id: str,
) -> EvaluatorConfig:
"""Create evaluator config with telemetry disabled."""
"""Create evaluator config with telemetry and full input sent with telemetry."""
return EvaluatorConfig(
google_llm_provider_config=google_llm_provider_config,
openai_llm_provider_config=openai_llm_provider_config,
anthropic_llm_provider_config=anthropic_llm_provider_config,
logger=get_logger() if logger is None else logger,
telemetry=TelemetryConfig(telemetry_partner_id=None, send_full_input_with_telemetry=False),
telemetry=TelemetryConfig(
telemetry_partner_id=telemetry_partner_id, send_full_input_with_telemetry=True
),
)


def create_config_telemetry_with_full_input(
def create_config_anonymous_telemetry(
*,
google_llm_provider_config: GoogleLLMProviderConfig | None = None,
openai_llm_provider_config: OpenAILLMProviderConfig | None = None,
anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None,
logger: Logger | None = None,
telemetry_partner_id: str,
send_full_input_with_telemetry: bool = False,
) -> EvaluatorConfig:
"""Create evaluator config with telemetry and full input sent with telemetry."""
"""Create evaluator config with anonymous telemetry."""
anonymous_telemetry_id = str(uuid.uuid4())
return create_config_with_telemetry_config(
google_llm_provider_config=google_llm_provider_config,
openai_llm_provider_config=openai_llm_provider_config,
anthropic_llm_provider_config=anthropic_llm_provider_config,
logger=logger,
telemetry_config=TelemetryConfig(
telemetry_partner_id=anonymous_telemetry_id,
send_full_input_with_telemetry=send_full_input_with_telemetry,
),
)


def create_config_anonymous_telemetry_with_full_input(
*,
google_llm_provider_config: GoogleLLMProviderConfig | None = None,
openai_llm_provider_config: OpenAILLMProviderConfig | None = None,
anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None,
logger: Logger | None = None,
) -> EvaluatorConfig:
"""Create evaluator config with anonymous telemetry and full input sent with telemetry."""
return create_config_anonymous_telemetry(
google_llm_provider_config=google_llm_provider_config,
openai_llm_provider_config=openai_llm_provider_config,
anthropic_llm_provider_config=anthropic_llm_provider_config,
logger=logger,
send_full_input_with_telemetry=True,
)


def create_config_with_telemetry_config(
*,
google_llm_provider_config: GoogleLLMProviderConfig | None = None,
openai_llm_provider_config: OpenAILLMProviderConfig | None = None,
anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None,
logger: Logger | None = None,
telemetry_config: TelemetryConfig,
) -> EvaluatorConfig:
"""Create evaluator config with telemetry. telemetry_config is required."""
return EvaluatorConfig(
google_llm_provider_config=google_llm_provider_config,
openai_llm_provider_config=openai_llm_provider_config,
anthropic_llm_provider_config=anthropic_llm_provider_config,
logger=get_logger() if logger is None else logger,
telemetry=TelemetryConfig(
telemetry_partner_id=telemetry_partner_id, send_full_input_with_telemetry=True
),
telemetry=telemetry_config,
)


def create_config_no_telemetry(
*,
google_llm_provider_config: GoogleLLMProviderConfig | None = None,
openai_llm_provider_config: OpenAILLMProviderConfig | None = None,
anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None,
logger: Logger | None = None,
) -> EvaluatorConfig:
"""Create evaluator config with telemetry disabled."""
return EvaluatorConfig(
google_llm_provider_config=google_llm_provider_config,
openai_llm_provider_config=openai_llm_provider_config,
anthropic_llm_provider_config=anthropic_llm_provider_config,
logger=get_logger() if logger is None else logger,
telemetry=TelemetryConfig(telemetry_partner_id=None, send_full_input_with_telemetry=False),
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"""Wire types aligned with ``sdks/typescript/src/telemetry/types.ts``.

Hand-maintained; keep in sync with the TypeScript SDK until a shared schema exists.
"""

from __future__ import annotations

from typing import Literal

from pydantic import BaseModel, ConfigDict

__all__ = [
"EvaluationTelemetryStatus",
"TelemetryEvent",
"TelemetryMetadataPayload",
"TelemetryStageDetail",
"TelemetryTokenUsage",
]

# Mirrors TS ``EvaluationStatus``
EvaluationTelemetryStatus = Literal["success", "error"]


class TelemetryTokenUsage(BaseModel):
"""Mirrors TS ``TokenUsage``."""

model_config = ConfigDict(extra="forbid")

input_tokens: int
output_tokens: int


class TelemetryStageDetail(BaseModel):
"""Mirrors TS ``StageDetail``."""

model_config = ConfigDict(extra="forbid")

stage: str
provider: str
latency_ms: float
token_usage: TelemetryTokenUsage | None = None
schema_validation_failed: bool | None = None


class TelemetryMetadataPayload(BaseModel):
"""Mirrors TS ``TelemetryMetadata``."""

model_config = ConfigDict(extra="forbid")

stage_details: list[TelemetryStageDetail] | None = None


class TelemetryEvent(BaseModel):
"""Mirrors TS ``TelemetryEvent`` (JSON field names match the TS interface)."""

model_config = ConfigDict(extra="forbid")

timestamp: str
sdk_version: str
evaluator_type: str
grade: str | None = None
status: EvaluationTelemetryStatus
error_code: str | None = None
latency_ms: float
text_length_chars: int
provider: str
token_usage: TelemetryTokenUsage | None = None
metadata: TelemetryMetadataPayload | None = None
input_text: str | None = None
121 changes: 121 additions & 0 deletions sdks/python/src/learning_commons_evaluators/telemetry/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Telemetry: schedule and send evaluation events (fire-and-forget HTTP POST)."""

from __future__ import annotations

import asyncio
import threading
import uuid
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone

import httpx

from learning_commons_evaluators.schemas.config import EvaluatorConfig
from learning_commons_evaluators.schemas.evaluator import EvaluationInput
from learning_commons_evaluators.schemas.metadata import EvaluationMetadata
from learning_commons_evaluators.telemetry.adapter import evaluation_to_typescript_telemetry_event
from learning_commons_evaluators.telemetry.utils import client_id_from_seed, iso_utc_z

__all__ = [
"evaluation_to_typescript_telemetry_event",
"schedule_send_telemetry",
"send_telemetry",
"should_send_telemetry",
]

_TELEMETRY_EXECUTOR: ThreadPoolExecutor | None = None
_TELEMETRY_EXECUTOR_LOCK = threading.Lock()


def _get_telemetry_executor() -> ThreadPoolExecutor:
global _TELEMETRY_EXECUTOR
with _TELEMETRY_EXECUTOR_LOCK:
if _TELEMETRY_EXECUTOR is None:
_TELEMETRY_EXECUTOR = ThreadPoolExecutor(
max_workers=2,
thread_name_prefix="lc-telemetry",
)
return _TELEMETRY_EXECUTOR


def should_send_telemetry(config: EvaluatorConfig) -> bool:
"""Return True when telemetry is configured with a non-empty partner / client id."""
partner_id = config.telemetry.telemetry_partner_id
return bool(partner_id and partner_id.strip())


def _is_uuid(value: str | None) -> bool:
if value is None:
return False
try:
uuid.UUID(value)
return True
except (ValueError, TypeError, AttributeError):
return False


async def send_telemetry(
evaluation_metadata: EvaluationMetadata,
inp: EvaluationInput | None,
config: EvaluatorConfig,
) -> None:
"""POST a TypeScript-shaped telemetry JSON payload. Never raises to callers (logs failures)."""
if not should_send_telemetry(config):
return

try:
partner_id = config.telemetry.telemetry_partner_id
assert (
partner_id is not None
) # for mypy: ``should_send_telemetry`` guarantees non-empty after strip.
telemetry_partner_id = partner_id.strip()

event = evaluation_to_typescript_telemetry_event(evaluation_metadata, inp, config)
# TS SDK sets timestamp at send time (`new Date().toISOString()`), not evaluation start.
event = event.model_copy(update={"timestamp": iso_utc_z(datetime.now(timezone.utc))})
payload = event.model_dump(mode="json", exclude_none=True)

api_key = telemetry_partner_id if not _is_uuid(telemetry_partner_id) else None
client_id = (
telemetry_partner_id
if _is_uuid(telemetry_partner_id)
else client_id_from_seed(telemetry_partner_id, config.client_id_seed)
)

headers: dict[str, str] = {
"Content-Type": "application/json",
"X-Client-ID": client_id,
}
if api_key is not None:
headers["X-API-Key"] = api_key

timeout = httpx.Timeout(5.0)
async with httpx.AsyncClient(timeout=timeout) as client:
response = await client.post(config.telemetry.endpoint, json=payload, headers=headers)
if response.is_error:
# Log status only; response bodies may echo input text or other sensitive data.
config.logger.warning(
"telemetry send failed: HTTP %s",
response.status_code,
)
except Exception as e:
# Log exception type only; ``str(e)`` may include payload fields (e.g. input_text).
config.logger.warning(
"telemetry send failed: %s",
type(e).__qualname__,
)


def schedule_send_telemetry(
evaluation_metadata: EvaluationMetadata,
inp: EvaluationInput | None,
config: EvaluatorConfig,
) -> None:
"""Fire-and-forget: run :func:`send_telemetry` on a shared worker when telemetry is enabled."""
if not should_send_telemetry(config):
return

def _run() -> None:
asyncio.run(send_telemetry(evaluation_metadata, inp, config))

_get_telemetry_executor().submit(_run)
Loading
Loading