learning-commons-org · czi-fsisenda · May 20, 2026 · May 20, 2026 · May 20, 2026 · May 21, 2026
diff --git a/sdks/python/README.md b/sdks/python/README.md
@@ -230,6 +230,31 @@ config = create_config(
 )
 ```
 
+### Provider config validation
+
+When you construct an evaluator (or call `evaluate` / `evaluate_sync`), the SDK checks
+that every `LLMProvider` referenced by a `PromptSettings` field on the active
+`EvaluationSettings` has a matching provider config on `EvaluatorConfig`. For example,
+the vocabulary evaluator’s defaults use both Google and OpenAI, so both
+`google_llm_provider_config` and `openai_llm_provider_config` must be set; conventionality
+only needs Google.
+
+Validation runs:
+
+- In `BaseEvaluator.__init__` against the resolved default evaluation settings (constructor
+  override or the subclass class attribute).
+- At the start of each `evaluate()` call against the settings used for that run (including
+  per-call `evaluation_settings` overrides).
+
+If a required provider is missing, construction or evaluation raises `ConfigurationError`
+with the same message used at LLM call time (for example,
+`Google provider config is not set on EvaluatorConfig`). You can also call
+`config.validate_supports_evaluation_settings(settings)` directly before constructing an
+evaluator.
+
+Only providers actually used in the settings object are required — you do not need to
+configure every provider on every evaluator.
+
 ### Per-instance default evaluation settings
 
 Every `BaseEvaluator` subclass defines **class-level** `default_evaluation_settings`
@@ -261,6 +286,11 @@ result = evaluator.evaluate_sync(input)
 result = evaluator.evaluate_sync(input, evaluation_settings=other_settings)
 ```
 
+If `other_settings` references a provider that is not on `config`, `evaluate_sync` raises
+`ConfigurationError` before any LLM call. The same applies when you pass
+`default_evaluation_settings` at construction: every provider in those settings must be
+configured on `config`.
+
 If you omit `default_evaluation_settings` at construction, attribute lookup uses the
 subclass class attribute, same as before. Whenever you call `evaluate_sync()` or
 `await evaluator.evaluate(...)` without `evaluation_settings`, the SDK uses
@@ -437,6 +467,10 @@ class MyEvaluator(BaseEvaluator[MyInput, EvaluationResult, MySettings]):
 
 If you override `__init__` on the subclass, accept the same keyword-only argument and forward it: `super().__init__(config, default_evaluation_settings=default_evaluation_settings)`.
 
+Declare each prompt step as a `PromptSettings` field on your settings model (typically named
+`prompt_settings_*`). The base class uses those fields to determine which provider configs
+must be present on `EvaluatorConfig`.
+
 ## License
 
 MIT
diff --git a/sdks/python/src/learning_commons_evaluators/evaluators/base.py b/sdks/python/src/learning_commons_evaluators/evaluators/base.py
@@ -64,6 +64,10 @@ class BaseEvaluator(ABC, Generic[InputT, OutputT, SettingsT]):
     Pass ``default_evaluation_settings`` at construction to override the class-level
     defaults for that instance (used when :meth:`evaluate` is called without
     ``evaluation_settings``).
+
+    Raises:
+        ConfigurationError: Default evaluation settings require an LLM provider
+            that is not configured on ``config``.
     """
 
     config: EvaluatorConfig
@@ -79,7 +83,12 @@ def __init__(
         self.config = config
         if default_evaluation_settings is not None:
             self.default_evaluation_settings = default_evaluation_settings
-        # TODO: validate config
+        settings_for_validation = (
+            default_evaluation_settings
+            if default_evaluation_settings is not None
+            else self.__class__.default_evaluation_settings
+        )
+        config.validate_supports_evaluation_settings(settings_for_validation)
 
     async def evaluate(
         self,
@@ -130,6 +139,7 @@ async def evaluate(
             extra={"evaluation_metadata": evaluation_metadata},
         )
         try:
+            self.config.validate_supports_evaluation_settings(evaluation_settings)
             input.validate()
             result = await self.evaluate_impl(input, evaluation_settings, evaluation_metadata)
             evaluation_metadata.status = Status.succeeded

diff --git a/sdks/python/src/learning_commons_evaluators/schemas/config.py b/sdks/python/src/learning_commons_evaluators/schemas/config.py
@@ -12,6 +12,7 @@
 from pydantic import BaseModel, ConfigDict
 
 from learning_commons_evaluators.logger import Logger, get_logger
+from learning_commons_evaluators.schemas.errors import ConfigurationError
 
 # --- LLM provider configs (for LLM calls in prompt steps) ---
 
@@ -75,6 +76,29 @@ class EvaluationSettings(BaseModel):
     model_config = ConfigDict(arbitrary_types_allowed=True)
 
 
+_PROVIDER_CONFIG_ATTR: dict[LLMProvider, str] = {
+    LLMProvider.GOOGLE: "google_llm_provider_config",
+    LLMProvider.OPENAI: "openai_llm_provider_config",
+    LLMProvider.ANTHROPIC: "anthropic_llm_provider_config",
+}
+
+_PROVIDER_MISSING_MESSAGE: dict[LLMProvider, str] = {
+    LLMProvider.GOOGLE: "Google provider config is not set on EvaluatorConfig",
+    LLMProvider.OPENAI: "OpenAI provider config is not set on EvaluatorConfig",
+    LLMProvider.ANTHROPIC: "Anthropic provider config is not set on EvaluatorConfig",
+}
+
+
+def _required_llm_providers(settings: EvaluationSettings) -> set[LLMProvider]:
+    """Collect LLM providers referenced by PromptSettings fields on settings."""
+    providers: set[LLMProvider] = set()
+    for name in type(settings).model_fields:
+        value = getattr(settings, name)
+        if isinstance(value, PromptSettings):
+            providers.add(value.provider_type)
+    return providers
+
+
 @dataclass(frozen=True)
 class TelemetryConfig:
     """Config for telemetry."""
@@ -106,6 +130,17 @@ class EvaluatorConfig:
     logger: Logger = field(default_factory=get_logger)
     telemetry: TelemetryConfig = field(default_factory=TelemetryConfig)
 
+    def validate_supports_evaluation_settings(self, settings: EvaluationSettings) -> None:
+        """Raise ConfigurationError if settings require an LLM provider not configured on self."""
+        required = _required_llm_providers(settings)
+        missing_messages = [
+            _PROVIDER_MISSING_MESSAGE[provider]
+            for provider in sorted(required, key=lambda p: p.value)
+            if getattr(self, _PROVIDER_CONFIG_ATTR[provider]) is None
+        ]
+        if missing_messages:
+            raise ConfigurationError("; ".join(missing_messages))
+
 
 def create_config(
     *,

diff --git a/sdks/python/tests/conftest.py b/sdks/python/tests/conftest.py
@@ -3,7 +3,11 @@
 import pytest
 
 from learning_commons_evaluators import create_config_no_telemetry
-from learning_commons_evaluators.schemas.config import EvaluationSettings
+from learning_commons_evaluators.schemas.config import (
+    EvaluationSettings,
+    GoogleLLMProviderConfig,
+    OpenAILLMProviderConfig,
+)
 from learning_commons_evaluators.schemas.metadata import (
     EvaluationMetadata,
     EvaluatorMaturity,
@@ -42,3 +46,20 @@ def evaluation_metadata(evaluator_metadata):
 def config():
     """EvaluatorConfig with no telemetry, suitable for unit tests."""
     return create_config_no_telemetry()
+
+
+@pytest.fixture
+def config_with_google():
+    """EvaluatorConfig with Google provider set (conventionality and similar evaluators)."""
+    return create_config_no_telemetry(
+        google_llm_provider_config=GoogleLLMProviderConfig(api_key="test-google-key"),
+    )
+
+
+@pytest.fixture
+def config_with_google_and_openai():
+    """EvaluatorConfig with Google and OpenAI providers set (vocabulary evaluator)."""
+    return create_config_no_telemetry(
+        google_llm_provider_config=GoogleLLMProviderConfig(api_key="test-google-key"),
+        openai_llm_provider_config=OpenAILLMProviderConfig(api_key="test-openai-key"),
+    )
diff --git a/sdks/python/tests/contract_tests/test_conventionality.py b/sdks/python/tests/contract_tests/test_conventionality.py
@@ -23,11 +23,7 @@
 copy.
 """
 
-from learning_commons_evaluators import (
-    ConventionalityEvaluationInput,
-    ConventionalityEvaluator,
-    create_config_no_telemetry,
-)
+from learning_commons_evaluators import ConventionalityEvaluationInput, ConventionalityEvaluator
 from learning_commons_evaluators.schemas.metadata import Status
 
 from .conventionality import (
@@ -38,7 +34,7 @@
 
 
 class TestConventionalityContract:
-    def test_turnip_grade4(self) -> None:
+    def test_turnip_grade4(self, config_with_google) -> None:
         """Turnip classroom narrative, grade 4.
 
         Verifies:
@@ -49,8 +45,7 @@ def test_turnip_grade4(self) -> None:
         """
         case = load_conventionality_turnip_case()
 
-        config = create_config_no_telemetry()
-        evaluator = ConventionalityEvaluator(config)
+        evaluator = ConventionalityEvaluator(config_with_google)
         inp = ConventionalityEvaluationInput(
             text=case.input["text"],
             grade=case.input["grade"],

diff --git a/sdks/python/tests/contract_tests/test_vocabulary.py b/sdks/python/tests/contract_tests/test_vocabulary.py
@@ -32,11 +32,7 @@
 the ``user_prompt`` and ``llm_response`` fields need to be populated.
 """
 
-from learning_commons_evaluators import (
-    VocabularyEvaluationInput,
-    VocabularyEvaluator,
-    create_config_no_telemetry,
-)
+from learning_commons_evaluators import VocabularyEvaluationInput, VocabularyEvaluator
 from learning_commons_evaluators.schemas.metadata import Status
 
 from .harness import ContractTestHarness
@@ -49,7 +45,7 @@
 
 
 class TestVocabularyContractGrades34:
-    def test_marco_polo_grade3(self) -> None:
+    def test_marco_polo_grade3(self, config_with_google_and_openai) -> None:
         """Marco Polo passage, grade 3 — grades 3–4 Gemini path.
 
         Verifies:
@@ -60,8 +56,7 @@ def test_marco_polo_grade3(self) -> None:
         """
         case = load_vocabulary_grade34_case()
 
-        config = create_config_no_telemetry()
-        evaluator = VocabularyEvaluator(config)
+        evaluator = VocabularyEvaluator(config_with_google_and_openai)
         inp = VocabularyEvaluationInput(
             text=case.input["text"],
             grade=case.input["grade"],
@@ -94,7 +89,7 @@ def test_marco_polo_grade3(self) -> None:
 
 
 class TestVocabularyContractOtherGrades:
-    def test_hurricanes_grade7(self) -> None:
+    def test_hurricanes_grade7(self, config_with_google_and_openai) -> None:
         """Hurricane formation passage, grade 7 — grades 5–12 GPT path.
 
         Verifies:
@@ -105,8 +100,7 @@ def test_hurricanes_grade7(self) -> None:
         """
         case = load_vocabulary_other_grades_case()
 
-        config = create_config_no_telemetry()
-        evaluator = VocabularyEvaluator(config)
+        evaluator = VocabularyEvaluator(config_with_google_and_openai)
         inp = VocabularyEvaluationInput(
             text=case.input["text"],
             grade=case.input["grade"],