diff --git a/sdks/python/README.md b/sdks/python/README.md index 9a1e80c..faf479a 100644 --- a/sdks/python/README.md +++ b/sdks/python/README.md @@ -8,7 +8,7 @@ Python SDK for Learning Commons educational text evaluators. - **`schemas/`** – Pydantic schemas for inputs, outputs, config, metadata, and errors. - **`providers/`** – LangChain-based LLM provider factory (OpenAI, Google, Anthropic). - **`settings/`** – TOML settings loader for evaluator configuration. -- **`config.py`** – Re-exports config types (`EvaluatorConfig`, `PromptProviderConfig`, factory functions). +- **`config.py`** – Re-exports config types (`EvaluatorConfig`, `LLMProviderConfig`, factory functions). - **`errors.py`** – Re-exports error types. - **`logger.py`** – Standard Python logging utilities. @@ -138,7 +138,7 @@ import logging from learning_commons_evaluators import ( ConventionalityEvaluator, ConventionalityEvaluationInput, - GooglePromptProviderConfig, + GoogleLLMProviderConfig, create_config, ) @@ -147,7 +147,7 @@ logging.basicConfig(level=logging.INFO) # Create config with provider credentials config = create_config( - google_prompt_provider_config=GooglePromptProviderConfig(api_key="your-google-key"), + google_llm_provider_config=GoogleLLMProviderConfig(api_key="your-google-key"), telemetry_partner_id="your-telemetry-id", ) @@ -175,12 +175,12 @@ Evaluates text for conventionality of language (idioms, metaphors, implied meani from learning_commons_evaluators import ( ConventionalityEvaluator, ConventionalityEvaluationInput, - GooglePromptProviderConfig, + GoogleLLMProviderConfig, create_config, ) config = create_config( - google_prompt_provider_config=GooglePromptProviderConfig(api_key="..."), + google_llm_provider_config=GoogleLLMProviderConfig(api_key="..."), telemetry_partner_id="your-telemetry-id", ) evaluator = ConventionalityEvaluator(config) @@ -205,14 +205,14 @@ Each LLM provider requires its own config with an API key: ```python from learning_commons_evaluators import ( - GooglePromptProviderConfig, - OpenAIPromptProviderConfig, - AnthropicPromptProviderConfig, + GoogleLLMProviderConfig, + OpenAILLMProviderConfig, + AnthropicLLMProviderConfig, ) -google_config = GooglePromptProviderConfig(api_key="...") -openai_config = OpenAIPromptProviderConfig(api_key="...") -anthropic_config = AnthropicPromptProviderConfig(api_key="...") +google_config = GoogleLLMProviderConfig(api_key="...") +openai_config = OpenAILLMProviderConfig(api_key="...") +anthropic_config = AnthropicLLMProviderConfig(api_key="...") ``` ### EvaluatorConfig @@ -223,8 +223,8 @@ Use `create_config()` to create an `EvaluatorConfig`: from learning_commons_evaluators import create_config config = create_config( - google_prompt_provider_config=google_config, - openai_prompt_provider_config=openai_config, + google_llm_provider_config=google_config, + openai_llm_provider_config=openai_config, telemetry_partner_id="your-telemetry-id", logger=my_logger, # Optional: any standard logging.Logger (default: package logger) ) diff --git a/sdks/python/src/learning_commons_evaluators/__init__.py b/sdks/python/src/learning_commons_evaluators/__init__.py index b3208dc..cccf241 100644 --- a/sdks/python/src/learning_commons_evaluators/__init__.py +++ b/sdks/python/src/learning_commons_evaluators/__init__.py @@ -4,11 +4,11 @@ # Config from learning_commons_evaluators.config import ( - AnthropicPromptProviderConfig, + AnthropicLLMProviderConfig, EvaluatorConfig, - GooglePromptProviderConfig, - OpenAIPromptProviderConfig, - PromptProviderConfig, + GoogleLLMProviderConfig, + LLMProviderConfig, + OpenAILLMProviderConfig, PromptSettings, TelemetryConfig, create_config, @@ -55,7 +55,7 @@ GradeInputField, TextInputField, ) -from learning_commons_evaluators.schemas.config import EvaluationSettings, LlmProvider +from learning_commons_evaluators.schemas.config import EvaluationSettings, LLMProvider from learning_commons_evaluators.schemas.conventionality import ( ConventionalityEvaluationSettings, ConventionalityOutput, @@ -95,7 +95,7 @@ "__description__", "__version__", "APIError", - "AnthropicPromptProviderConfig", + "AnthropicLLMProviderConfig", "AuthenticationError", "BaseEvaluator", "ConfigurationError", @@ -115,7 +115,7 @@ "EvaluatorMetadata", "EvaluatorRetryableError", "EvaluatorTimeoutError", - "GooglePromptProviderConfig", + "GoogleLLMProviderConfig", "AnyInputSpec", "GradeInputField", "GradeInputSpec", @@ -123,12 +123,12 @@ "InputSpec", "InputT", "TextInputSpec", - "LlmProvider", + "LLMProvider", + "LLMProviderConfig", "Logger", "NetworkError", - "OpenAIPromptProviderConfig", + "OpenAILLMProviderConfig", "OutputT", - "PromptProviderConfig", "PromptSettings", "RateLimitError", "SDK_LOGGER_NAME", diff --git a/sdks/python/src/learning_commons_evaluators/config.py b/sdks/python/src/learning_commons_evaluators/config.py index dc5995f..66e5d88 100644 --- a/sdks/python/src/learning_commons_evaluators/config.py +++ b/sdks/python/src/learning_commons_evaluators/config.py @@ -1,11 +1,11 @@ """Re-export evaluator config from schemas.config for package-level imports.""" from learning_commons_evaluators.schemas.config import ( - AnthropicPromptProviderConfig, + AnthropicLLMProviderConfig, EvaluatorConfig, - GooglePromptProviderConfig, - OpenAIPromptProviderConfig, - PromptProviderConfig, + GoogleLLMProviderConfig, + LLMProviderConfig, + OpenAILLMProviderConfig, PromptSettings, TelemetryConfig, create_config, @@ -14,11 +14,11 @@ ) __all__ = [ - "AnthropicPromptProviderConfig", + "AnthropicLLMProviderConfig", "EvaluatorConfig", - "GooglePromptProviderConfig", - "OpenAIPromptProviderConfig", - "PromptProviderConfig", + "GoogleLLMProviderConfig", + "OpenAILLMProviderConfig", + "LLMProviderConfig", "PromptSettings", "TelemetryConfig", "create_config", diff --git a/sdks/python/src/learning_commons_evaluators/evaluators/__init__.py b/sdks/python/src/learning_commons_evaluators/evaluators/__init__.py index 5bb9fc1..83211f7 100644 --- a/sdks/python/src/learning_commons_evaluators/evaluators/__init__.py +++ b/sdks/python/src/learning_commons_evaluators/evaluators/__init__.py @@ -1,6 +1,6 @@ """Evaluator implementations. -Config types (``EvaluatorConfig``, ``GooglePromptProviderConfig``, etc.) are +Config types (``EvaluatorConfig``, ``GoogleLLMProviderConfig``, etc.) are exported from the top-level :mod:`learning_commons_evaluators` package, not from here. Import evaluator classes directly from this sub-package only when you want to be explicit about the source. diff --git a/sdks/python/src/learning_commons_evaluators/providers/__init__.py b/sdks/python/src/learning_commons_evaluators/providers/__init__.py index f30b238..fc42968 100644 --- a/sdks/python/src/learning_commons_evaluators/providers/__init__.py +++ b/sdks/python/src/learning_commons_evaluators/providers/__init__.py @@ -2,8 +2,8 @@ create_provider() returns a langchain_core BaseChatModel (OpenAI, Google, Anthropic). -Config types (PromptProviderConfig and provider-specific configs: -AnthropicPromptProviderConfig, GooglePromptProviderConfig, OpenAIPromptProviderConfig) +Config types (LLMProviderConfig and provider-specific configs: +AnthropicLLMProviderConfig, GoogleLLMProviderConfig, OpenAILLMProviderConfig) live in learning_commons_evaluators.schemas.config. """ diff --git a/sdks/python/src/learning_commons_evaluators/providers/langchain.py b/sdks/python/src/learning_commons_evaluators/providers/langchain.py index 9496831..27a9cc8 100644 --- a/sdks/python/src/learning_commons_evaluators/providers/langchain.py +++ b/sdks/python/src/learning_commons_evaluators/providers/langchain.py @@ -14,7 +14,7 @@ from learning_commons_evaluators.errors import ConfigurationError from learning_commons_evaluators.schemas.config import ( EvaluatorConfig, - LlmProvider, + LLMProvider, PromptSettings, ) from learning_commons_evaluators.schemas.metadata import TokenUsage @@ -48,7 +48,7 @@ def _create_openai_llm( ) -> BaseChatModel: from langchain_openai import ChatOpenAI - openai_config = evaluator_config.openai_prompt_provider_config + openai_config = evaluator_config.openai_llm_provider_config if openai_config is None: raise ConfigurationError("OpenAI provider config is not set on EvaluatorConfig") kwargs: dict[str, Any] = { @@ -64,7 +64,7 @@ def _create_google_llm( ) -> BaseChatModel: from langchain_google_genai import ChatGoogleGenerativeAI - google_config = evaluator_config.google_prompt_provider_config + google_config = evaluator_config.google_llm_provider_config if google_config is None: raise ConfigurationError("Google provider config is not set on EvaluatorConfig") return ChatGoogleGenerativeAI( @@ -79,7 +79,7 @@ def _create_anthropic_llm( ) -> BaseChatModel: from langchain_anthropic import ChatAnthropic - anthropic_config = evaluator_config.anthropic_prompt_provider_config + anthropic_config = evaluator_config.anthropic_llm_provider_config if anthropic_config is None: raise ConfigurationError("Anthropic provider config is not set on EvaluatorConfig") return ChatAnthropic( @@ -93,11 +93,11 @@ def create_provider( prompt_settings: PromptSettings, evaluator_config: EvaluatorConfig ) -> BaseChatModel: """Create a LangChain chat model from a PromptSettings for use in a chain.""" - if prompt_settings.provider_type == LlmProvider.OPENAI: + if prompt_settings.provider_type == LLMProvider.OPENAI: return _create_openai_llm(prompt_settings, evaluator_config) - if prompt_settings.provider_type == LlmProvider.GOOGLE: + if prompt_settings.provider_type == LLMProvider.GOOGLE: return _create_google_llm(prompt_settings, evaluator_config) - if prompt_settings.provider_type == LlmProvider.ANTHROPIC: + if prompt_settings.provider_type == LLMProvider.ANTHROPIC: return _create_anthropic_llm(prompt_settings, evaluator_config) raise ConfigurationError( f"Unsupported LLM provider type: {prompt_settings.provider_type!r}. " diff --git a/sdks/python/src/learning_commons_evaluators/schemas/__init__.py b/sdks/python/src/learning_commons_evaluators/schemas/__init__.py index 5070c69..b168e59 100644 --- a/sdks/python/src/learning_commons_evaluators/schemas/__init__.py +++ b/sdks/python/src/learning_commons_evaluators/schemas/__init__.py @@ -6,7 +6,7 @@ ) from learning_commons_evaluators.schemas.config import ( EvaluationSettings, - LlmProvider, + LLMProvider, PromptSettings, ) from learning_commons_evaluators.schemas.conventionality import ( @@ -61,7 +61,7 @@ "GradeInputField", "InputField", "InputMetadata", - "LlmProvider", + "LLMProvider", "PromptSettings", "PROMPT_STEP_EXTRA_PROMPT_SETTINGS", "PROMPT_STEP_EXTRA_TOKEN_USAGE", diff --git a/sdks/python/src/learning_commons_evaluators/schemas/config.py b/sdks/python/src/learning_commons_evaluators/schemas/config.py index 94e44f2..8d1d035 100644 --- a/sdks/python/src/learning_commons_evaluators/schemas/config.py +++ b/sdks/python/src/learning_commons_evaluators/schemas/config.py @@ -1,7 +1,7 @@ """ Evaluator configuration and metadata. -Prompt provider configs (base, Google, OpenAI, Anthropic), EvaluatorConfig, and evaluator +LLM provider configs (base, Google, OpenAI, Anthropic), EvaluatorConfig, and evaluator metadata live here. Evaluator config is created via factory methods (create_config, create_config_no_telemetry, create_config_telemetry_with_full_input). """ @@ -13,11 +13,10 @@ from learning_commons_evaluators.logger import Logger, get_logger -# --- Prompt provider configs (for LLM calls in prompt steps) --- +# --- LLM provider configs (for LLM calls in prompt steps) --- -# TODO: rename to LLMProvider -class LlmProvider(str, Enum): +class LLMProvider(str, Enum): """LLM provider identifier. Subclass of str so it compares and serializes as the provider name.""" ANTHROPIC = "anthropic" @@ -25,27 +24,26 @@ class LlmProvider(str, Enum): OPENAI = "openai" -# TODO: rename to LLMProviderConfig and subclasses to GoogleLLMProviderConfig, OpenAILLMProviderConfig, AnthropicLLMProviderConfig. @dataclass(frozen=True) -class PromptProviderConfig: - """Base type for prompt provider configuration.""" +class LLMProviderConfig: + """Base type for LLM provider configuration.""" api_key: str - type: LlmProvider + type: LLMProvider @dataclass(frozen=True) -class GooglePromptProviderConfig(PromptProviderConfig): - """Google (Gemini) prompt provider config. Takes an API key.""" +class GoogleLLMProviderConfig(LLMProviderConfig): + """Google (Gemini) LLM provider config. Takes an API key.""" - type: LlmProvider = LlmProvider.GOOGLE + type: LLMProvider = LLMProvider.GOOGLE @dataclass(frozen=True) -class OpenAIPromptProviderConfig(PromptProviderConfig): - """OpenAI prompt provider config. Takes an API key.""" +class OpenAILLMProviderConfig(LLMProviderConfig): + """OpenAI LLM provider config. Takes an API key.""" - type: LlmProvider = LlmProvider.OPENAI + type: LLMProvider = LLMProvider.OPENAI # TODO: verify base_url functionality before enabling # base_url: str | None = ( # None # Optional; for OpenAI-compatible endpoints (e.g. Azure, proxy). Used only when type is OPENAI. @@ -53,17 +51,17 @@ class OpenAIPromptProviderConfig(PromptProviderConfig): @dataclass(frozen=True) -class AnthropicPromptProviderConfig(PromptProviderConfig): - """Anthropic (Claude) prompt provider config. Takes an API key.""" +class AnthropicLLMProviderConfig(LLMProviderConfig): + """Anthropic (Claude) LLM provider config. Takes an API key.""" - type: LlmProvider = LlmProvider.ANTHROPIC + type: LLMProvider = LLMProvider.ANTHROPIC @dataclass(frozen=True) class PromptSettings: """Settings for a prompt step: provider, model, temperature.""" - provider_type: LlmProvider + provider_type: LLMProvider model: str temperature: float @@ -91,7 +89,7 @@ class TelemetryConfig: @dataclass(frozen=True) class EvaluatorConfig: """ - Config for creating an evaluator: prompt provider configs, logger, telemetry. + Config for creating an evaluator: LLM provider configs, logger, telemetry. Logger defaults to the SDK package logger (``learning_commons_evaluators``), so log records propagate like typical library loggers. Pass ``logger=`` to use a @@ -102,27 +100,27 @@ class EvaluatorConfig: create_config_telemetry_with_full_input, then pass the config to the evaluator constructor. """ - google_prompt_provider_config: GooglePromptProviderConfig | None = None - openai_prompt_provider_config: OpenAIPromptProviderConfig | None = None - anthropic_prompt_provider_config: AnthropicPromptProviderConfig | None = None + google_llm_provider_config: GoogleLLMProviderConfig | None = None + openai_llm_provider_config: OpenAILLMProviderConfig | None = None + anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None logger: Logger = field(default_factory=get_logger) telemetry: TelemetryConfig = field(default_factory=TelemetryConfig) def create_config( *, - google_prompt_provider_config: GooglePromptProviderConfig | None = None, - openai_prompt_provider_config: OpenAIPromptProviderConfig | None = None, - anthropic_prompt_provider_config: AnthropicPromptProviderConfig | None = None, + google_llm_provider_config: GoogleLLMProviderConfig | None = None, + openai_llm_provider_config: OpenAILLMProviderConfig | None = None, + anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None, logger: Logger | None = None, telemetry_partner_id: str, send_full_input_with_telemetry: bool = False, ) -> EvaluatorConfig: """Create evaluator config with telemetry. telemetry_partner_id is required.""" return EvaluatorConfig( - google_prompt_provider_config=google_prompt_provider_config, - openai_prompt_provider_config=openai_prompt_provider_config, - anthropic_prompt_provider_config=anthropic_prompt_provider_config, + google_llm_provider_config=google_llm_provider_config, + openai_llm_provider_config=openai_llm_provider_config, + anthropic_llm_provider_config=anthropic_llm_provider_config, logger=get_logger() if logger is None else logger, telemetry=TelemetryConfig( telemetry_partner_id=telemetry_partner_id, @@ -133,16 +131,16 @@ def create_config( def create_config_no_telemetry( *, - google_prompt_provider_config: GooglePromptProviderConfig | None = None, - openai_prompt_provider_config: OpenAIPromptProviderConfig | None = None, - anthropic_prompt_provider_config: AnthropicPromptProviderConfig | None = None, + google_llm_provider_config: GoogleLLMProviderConfig | None = None, + openai_llm_provider_config: OpenAILLMProviderConfig | None = None, + anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None, logger: Logger | None = None, ) -> EvaluatorConfig: """Create evaluator config with telemetry disabled.""" return EvaluatorConfig( - google_prompt_provider_config=google_prompt_provider_config, - openai_prompt_provider_config=openai_prompt_provider_config, - anthropic_prompt_provider_config=anthropic_prompt_provider_config, + google_llm_provider_config=google_llm_provider_config, + openai_llm_provider_config=openai_llm_provider_config, + anthropic_llm_provider_config=anthropic_llm_provider_config, logger=get_logger() if logger is None else logger, telemetry=TelemetryConfig(telemetry_partner_id=None, send_full_input_with_telemetry=False), ) @@ -150,17 +148,17 @@ def create_config_no_telemetry( def create_config_telemetry_with_full_input( *, - google_prompt_provider_config: GooglePromptProviderConfig | None = None, - openai_prompt_provider_config: OpenAIPromptProviderConfig | None = None, - anthropic_prompt_provider_config: AnthropicPromptProviderConfig | None = None, + google_llm_provider_config: GoogleLLMProviderConfig | None = None, + openai_llm_provider_config: OpenAILLMProviderConfig | None = None, + anthropic_llm_provider_config: AnthropicLLMProviderConfig | None = None, logger: Logger | None = None, telemetry_partner_id: str, ) -> EvaluatorConfig: """Create evaluator config with telemetry and full input sent with telemetry.""" return EvaluatorConfig( - google_prompt_provider_config=google_prompt_provider_config, - openai_prompt_provider_config=openai_prompt_provider_config, - anthropic_prompt_provider_config=anthropic_prompt_provider_config, + google_llm_provider_config=google_llm_provider_config, + openai_llm_provider_config=openai_llm_provider_config, + anthropic_llm_provider_config=anthropic_llm_provider_config, logger=get_logger() if logger is None else logger, telemetry=TelemetryConfig( telemetry_partner_id=telemetry_partner_id, send_full_input_with_telemetry=True diff --git a/sdks/python/src/learning_commons_evaluators/schemas/metadata.py b/sdks/python/src/learning_commons_evaluators/schemas/metadata.py index 26de4dd..dd1c654 100644 --- a/sdks/python/src/learning_commons_evaluators/schemas/metadata.py +++ b/sdks/python/src/learning_commons_evaluators/schemas/metadata.py @@ -7,7 +7,7 @@ from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator from learning_commons_evaluators._version import __version__ as sdk_version -from learning_commons_evaluators.schemas.config import LlmProvider, PromptSettings +from learning_commons_evaluators.schemas.config import LLMProvider, PromptSettings from learning_commons_evaluators.schemas.input_specs import AnyInputSpec @@ -79,7 +79,7 @@ def _normalize_maturity(cls, v: Any) -> Any: class TokenUsage(BaseModel): """Token usage for a some step of an evaluation: provider type, model, and token counts.""" - provider_type: LlmProvider + provider_type: LLMProvider model: str input_tokens: int output_tokens: int @@ -125,6 +125,6 @@ class EvaluationMetadata(BaseModel): input_metadata: InputMetadata status: Status = Status.processing error_details: str | None = None - total_token_usage: dict[LlmProvider, TokenUsage] = Field(default_factory=dict) + total_token_usage: dict[LLMProvider, TokenUsage] = Field(default_factory=dict) processing_time_ms: float = 0 step_details: dict[str, StepMetadata] = Field(default_factory=dict) diff --git a/sdks/python/src/learning_commons_evaluators/settings/_generated_conventionality_settings.py b/sdks/python/src/learning_commons_evaluators/settings/_generated_conventionality_settings.py index a3c62b7..a83b9b1 100644 --- a/sdks/python/src/learning_commons_evaluators/settings/_generated_conventionality_settings.py +++ b/sdks/python/src/learning_commons_evaluators/settings/_generated_conventionality_settings.py @@ -5,7 +5,7 @@ from __future__ import annotations -from learning_commons_evaluators.schemas.config import LlmProvider, PromptSettings +from learning_commons_evaluators.schemas.config import LLMProvider, PromptSettings from learning_commons_evaluators.schemas.conventionality import ConventionalityEvaluationSettings from learning_commons_evaluators.schemas.input_specs import GradeInputSpec, TextInputSpec from learning_commons_evaluators.schemas.metadata import EvaluatorMaturity, EvaluatorMetadata @@ -86,7 +86,7 @@ _EVALUATION_SETTINGS = ConventionalityEvaluationSettings( prompt_settings_step_conventionality_evaluation=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model='gemini-3-flash-preview', temperature=0.0, ), diff --git a/sdks/python/src/learning_commons_evaluators/settings/_generated_vocabulary_settings.py b/sdks/python/src/learning_commons_evaluators/settings/_generated_vocabulary_settings.py index 82562f8..5ad022c 100644 --- a/sdks/python/src/learning_commons_evaluators/settings/_generated_vocabulary_settings.py +++ b/sdks/python/src/learning_commons_evaluators/settings/_generated_vocabulary_settings.py @@ -5,7 +5,7 @@ from __future__ import annotations -from learning_commons_evaluators.schemas.config import LlmProvider, PromptSettings +from learning_commons_evaluators.schemas.config import LLMProvider, PromptSettings from learning_commons_evaluators.schemas.input_specs import GradeInputSpec, TextInputSpec from learning_commons_evaluators.schemas.metadata import EvaluatorMaturity, EvaluatorMetadata from learning_commons_evaluators.schemas.vocabulary import VocabularyEvaluationSettings @@ -383,16 +383,16 @@ _EVALUATION_SETTINGS = VocabularyEvaluationSettings( prompt_settings_step_background_knowledge=PromptSettings( - provider_type=LlmProvider.OPENAI, + provider_type=LLMProvider.OPENAI, model='gpt-4o-2024-11-20', temperature=0.0, ), prompt_settings_step_vocab_grades_3_4=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model='gemini-2.5-pro', temperature=0.0, ), - prompt_settings_step_vocab_other_grades=PromptSettings(provider_type=LlmProvider.OPENAI, model='gpt-4.1', temperature=0.0), + prompt_settings_step_vocab_other_grades=PromptSettings(provider_type=LLMProvider.OPENAI, model='gpt-4.1', temperature=0.0), ) # ── Public config object (imported by evaluator modules) ────────────────────── diff --git a/sdks/python/tests/evaluators/test_base.py b/sdks/python/tests/evaluators/test_base.py index 333e5c1..cc3f861 100644 --- a/sdks/python/tests/evaluators/test_base.py +++ b/sdks/python/tests/evaluators/test_base.py @@ -29,7 +29,7 @@ from learning_commons_evaluators.schemas.common_inputs import GradeInputField, TextInputField from learning_commons_evaluators.schemas.config import ( EvaluationSettings, - LlmProvider, + LLMProvider, PromptSettings, ) from learning_commons_evaluators.schemas.errors import APIError, EvaluatorError, ValidationError @@ -235,33 +235,33 @@ def emit(self, record: logging.LogRecord) -> None: class TestUpdateTotalTokenUsage: def test_inserts_usage_for_new_provider(self, stub_evaluator, evaluation_metadata): usage = TokenUsage( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", input_tokens=10, output_tokens=5, ) stub_evaluator.update_total_token_usage(usage, evaluation_metadata) - stored = evaluation_metadata.total_token_usage[LlmProvider.GOOGLE] + stored = evaluation_metadata.total_token_usage[LLMProvider.GOOGLE] assert stored.input_tokens == 10 assert stored.output_tokens == 5 def test_accumulates_usage_for_existing_provider(self, stub_evaluator, evaluation_metadata): - evaluation_metadata.total_token_usage[LlmProvider.GOOGLE] = TokenUsage( - provider_type=LlmProvider.GOOGLE, + evaluation_metadata.total_token_usage[LLMProvider.GOOGLE] = TokenUsage( + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", input_tokens=10, output_tokens=5, ) stub_evaluator.update_total_token_usage( TokenUsage( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", input_tokens=20, output_tokens=15, ), evaluation_metadata, ) - stored = evaluation_metadata.total_token_usage[LlmProvider.GOOGLE] + stored = evaluation_metadata.total_token_usage[LLMProvider.GOOGLE] assert stored.input_tokens == 30 assert stored.output_tokens == 20 @@ -326,7 +326,7 @@ def _fake_llm(_pv): out = ev.execute_prompt_chain_step( step_name="raw", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -345,7 +345,7 @@ def test_json_dict_normalizer_without_parser_type_raises( stub_evaluator.execute_prompt_chain_step( step_name="raw", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -365,7 +365,7 @@ def _fake_llm(_pv): result = stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -400,7 +400,7 @@ def _double(d: dict) -> dict: result = stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -434,7 +434,7 @@ def _fake_llm(_pv): result = stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -461,7 +461,7 @@ def _fake_llm(_pv): stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -487,7 +487,7 @@ def _fake_llm(_pv): stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -500,7 +500,7 @@ def _fake_llm(_pv): def test_prompt_settings_recorded_in_step_extras(self, stub_evaluator, evaluation_metadata): settings = PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ) @@ -532,7 +532,7 @@ def _llm_with_usage(_pv): ) settings = PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ) @@ -551,7 +551,7 @@ def _llm_with_usage(_pv): step = evaluation_metadata.step_details["main"] assert step.extras[PROMPT_STEP_EXTRA_TOKEN_USAGE]["input_tokens"] == 42 assert step.extras[PROMPT_STEP_EXTRA_TOKEN_USAGE]["output_tokens"] == 17 - assert evaluation_metadata.total_token_usage[LlmProvider.GOOGLE].input_tokens == 42 + assert evaluation_metadata.total_token_usage[LLMProvider.GOOGLE].input_tokens == 42 def test_propagates_configuration_error_from_create_provider( self, stub_evaluator, evaluation_metadata @@ -567,7 +567,7 @@ def test_propagates_configuration_error_from_create_provider( stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -587,7 +587,7 @@ def test_propagates_evaluator_error_without_wrapping(self, stub_evaluator, evalu stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -609,7 +609,7 @@ def _boom(_pv): stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -633,7 +633,7 @@ def _bad(_pv): stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), @@ -659,7 +659,7 @@ def _partial(_pv): stub_evaluator.execute_prompt_chain_step( step_name="main", prompt_settings=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ), diff --git a/sdks/python/tests/evaluators/test_conventionality.py b/sdks/python/tests/evaluators/test_conventionality.py index e339da0..b3e4380 100644 --- a/sdks/python/tests/evaluators/test_conventionality.py +++ b/sdks/python/tests/evaluators/test_conventionality.py @@ -47,7 +47,7 @@ def test_evaluate_returns_evaluation_result(self): def test_evaluate_with_explicit_settings(self): from learning_commons_evaluators.schemas.config import ( - LlmProvider, + LLMProvider, PromptSettings, ) from learning_commons_evaluators.schemas.conventionality import ( @@ -58,7 +58,7 @@ def test_evaluate_with_explicit_settings(self): evaluator = ConventionalityEvaluator(config) settings = ConventionalityEvaluationSettings( prompt_settings_step_conventionality_evaluation=PromptSettings( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0, ) diff --git a/sdks/python/tests/providers/test_langchain.py b/sdks/python/tests/providers/test_langchain.py index b9f9c31..0b0ff04 100644 --- a/sdks/python/tests/providers/test_langchain.py +++ b/sdks/python/tests/providers/test_langchain.py @@ -11,9 +11,9 @@ ) from learning_commons_evaluators.schemas.config import ( EvaluatorConfig, - GooglePromptProviderConfig, - LlmProvider, - OpenAIPromptProviderConfig, + GoogleLLMProviderConfig, + LLMProvider, + OpenAILLMProviderConfig, PromptSettings, ) @@ -21,9 +21,9 @@ def _config(**kwargs) -> EvaluatorConfig: """Return an EvaluatorConfig with all providers set to None; pass provider kwargs to override.""" defaults: dict = { - "google_prompt_provider_config": None, - "openai_prompt_provider_config": None, - "anthropic_prompt_provider_config": None, + "google_llm_provider_config": None, + "openai_llm_provider_config": None, + "anthropic_llm_provider_config": None, } defaults.update(kwargs) return EvaluatorConfig(**defaults) @@ -36,38 +36,34 @@ def _config(**kwargs) -> EvaluatorConfig: class TestCreateProvider: def test_google_provider_returns_model(self): - config = _config( - google_prompt_provider_config=GooglePromptProviderConfig(api_key="test-key") - ) + config = _config(google_llm_provider_config=GoogleLLMProviderConfig(api_key="test-key")) settings = PromptSettings( - provider_type=LlmProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 ) assert create_provider(settings, config) is not None def test_openai_provider_returns_model(self): - config = _config( - openai_prompt_provider_config=OpenAIPromptProviderConfig(api_key="test-key") - ) + config = _config(openai_llm_provider_config=OpenAILLMProviderConfig(api_key="test-key")) settings = PromptSettings( - provider_type=LlmProvider.OPENAI, model="gpt-4o-mini", temperature=0.0 + provider_type=LLMProvider.OPENAI, model="gpt-4o-mini", temperature=0.0 ) assert create_provider(settings, config) is not None def test_raises_when_google_config_missing(self): settings = PromptSettings( - provider_type=LlmProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 ) with pytest.raises(ConfigurationError, match="Google provider config is not set"): create_provider(settings, _config()) def test_raises_when_openai_config_missing(self): - settings = PromptSettings(provider_type=LlmProvider.OPENAI, model="gpt-4o", temperature=0.0) + settings = PromptSettings(provider_type=LLMProvider.OPENAI, model="gpt-4o", temperature=0.0) with pytest.raises(ConfigurationError, match="OpenAI provider config is not set"): create_provider(settings, _config()) def test_raises_when_anthropic_config_missing(self): settings = PromptSettings( - provider_type=LlmProvider.ANTHROPIC, model="claude-3", temperature=0.0 + provider_type=LLMProvider.ANTHROPIC, model="claude-3", temperature=0.0 ) with pytest.raises(ConfigurationError, match="Anthropic provider config is not set"): create_provider(settings, _config()) @@ -79,7 +75,7 @@ def test_raises_configuration_error_for_unrecognized_provider_type(self): with pytest.raises(ConfigurationError, match="Unsupported LLM provider type"): create_provider( mock_settings, - _config(google_prompt_provider_config=GooglePromptProviderConfig(api_key="k")), + _config(google_llm_provider_config=GoogleLLMProviderConfig(api_key="k")), ) @@ -91,23 +87,23 @@ def test_raises_configuration_error_for_unrecognized_provider_type(self): class TestTokenUsageFromAIMessage: def test_returns_zero_usage_when_no_usage_metadata(self): settings = PromptSettings( - provider_type=LlmProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", temperature=0.0 ) usage = token_usage_from_aimessage(object(), settings) - assert usage.provider_type == LlmProvider.GOOGLE + assert usage.provider_type == LLMProvider.GOOGLE assert usage.model == "gemini-2.0-flash" assert usage.input_tokens == 0 assert usage.output_tokens == 0 def test_uses_usage_metadata_when_present(self): - settings = PromptSettings(provider_type=LlmProvider.OPENAI, model="gpt-4o", temperature=0.0) + settings = PromptSettings(provider_type=LLMProvider.OPENAI, model="gpt-4o", temperature=0.0) message = type("Msg", (), {"usage_metadata": {"input_tokens": 100, "output_tokens": 50}})() usage = token_usage_from_aimessage(message, settings) assert usage.input_tokens == 100 assert usage.output_tokens == 50 def test_falls_back_to_response_metadata_when_usage_metadata_absent(self): - settings = PromptSettings(provider_type=LlmProvider.GOOGLE, model="gemini", temperature=0.0) + settings = PromptSettings(provider_type=LLMProvider.GOOGLE, model="gemini", temperature=0.0) message = type( "Msg", (), diff --git a/sdks/python/tests/schemas/test_config.py b/sdks/python/tests/schemas/test_config.py index 6a50b09..92942f4 100644 --- a/sdks/python/tests/schemas/test_config.py +++ b/sdks/python/tests/schemas/test_config.py @@ -1,4 +1,4 @@ -"""Tests for EvaluatorConfig, PromptProviderConfig subclasses, and factory functions.""" +"""Tests for EvaluatorConfig, LLMProviderConfig subclasses, and factory functions.""" from dataclasses import FrozenInstanceError @@ -6,10 +6,10 @@ from learning_commons_evaluators.logger import SDK_LOGGER_NAME, get_logger from learning_commons_evaluators.schemas.config import ( - AnthropicPromptProviderConfig, - GooglePromptProviderConfig, - LlmProvider, - OpenAIPromptProviderConfig, + AnthropicLLMProviderConfig, + GoogleLLMProviderConfig, + LLMProvider, + OpenAILLMProviderConfig, TelemetryConfig, create_config, create_config_no_telemetry, @@ -17,26 +17,26 @@ ) -class TestLlmProvider: +class TestLLMProvider: @pytest.mark.parametrize( "member,value", [ - (LlmProvider.ANTHROPIC, "anthropic"), - (LlmProvider.GOOGLE, "google"), - (LlmProvider.OPENAI, "openai"), + (LLMProvider.ANTHROPIC, "anthropic"), + (LLMProvider.GOOGLE, "google"), + (LLMProvider.OPENAI, "openai"), ], ) def test_provider_value(self, member, value): assert member.value == value -class TestPromptProviderConfigs: +class TestLLMProviderConfigs: @pytest.mark.parametrize( "cls,expected_type", [ - (GooglePromptProviderConfig, LlmProvider.GOOGLE), - (OpenAIPromptProviderConfig, LlmProvider.OPENAI), - (AnthropicPromptProviderConfig, LlmProvider.ANTHROPIC), + (GoogleLLMProviderConfig, LLMProvider.GOOGLE), + (OpenAILLMProviderConfig, LLMProvider.OPENAI), + (AnthropicLLMProviderConfig, LLMProvider.ANTHROPIC), ], ) def test_provider_config_default_type(self, cls, expected_type): @@ -54,11 +54,11 @@ def test_create_config_no_telemetry_defaults(self): def test_create_config_no_telemetry_accepts_providers(self): config = create_config_no_telemetry( - google_prompt_provider_config=GooglePromptProviderConfig(api_key="gk"), - openai_prompt_provider_config=OpenAIPromptProviderConfig(api_key="ok"), + google_llm_provider_config=GoogleLLMProviderConfig(api_key="gk"), + openai_llm_provider_config=OpenAILLMProviderConfig(api_key="ok"), ) - assert config.google_prompt_provider_config.api_key == "gk" - assert config.openai_prompt_provider_config.api_key == "ok" + assert config.google_llm_provider_config.api_key == "gk" + assert config.openai_llm_provider_config.api_key == "ok" def test_create_config_sets_telemetry_partner_id(self): config = create_config(telemetry_partner_id="tid-123") diff --git a/sdks/python/tests/schemas/test_metadata.py b/sdks/python/tests/schemas/test_metadata.py index 6ddbd81..67e9c35 100644 --- a/sdks/python/tests/schemas/test_metadata.py +++ b/sdks/python/tests/schemas/test_metadata.py @@ -9,7 +9,7 @@ import pytest from pydantic import ValidationError -from learning_commons_evaluators.schemas.config import LlmProvider, PromptSettings +from learning_commons_evaluators.schemas.config import LLMProvider, PromptSettings from learning_commons_evaluators.schemas.input_specs import TextInputSpec from learning_commons_evaluators.schemas.metadata import ( PROMPT_STEP_EXTRA_PROMPT_SETTINGS, @@ -98,18 +98,18 @@ def test_model_validate_unknown_input_type_raises(self): def test_fields(self): usage = TokenUsage( - provider_type=LlmProvider.GOOGLE, + provider_type=LLMProvider.GOOGLE, model="gemini-2.0-flash", input_tokens=100, output_tokens=50, ) - assert usage.provider_type == LlmProvider.GOOGLE + assert usage.provider_type == LLMProvider.GOOGLE assert usage.input_tokens == 100 assert usage.output_tokens == 50 def test_zero_tokens_are_valid(self): usage = TokenUsage( - provider_type=LlmProvider.OPENAI, + provider_type=LLMProvider.OPENAI, model="gpt-4o-mini", input_tokens=0, output_tokens=0, @@ -139,7 +139,7 @@ class TestPromptSettingsToExtrasValue: def test_produces_json_serialisable_dict(self): """provider_type must be a plain string (not the enum) so the dict is JSON-safe.""" settings = PromptSettings( - provider_type=LlmProvider.ANTHROPIC, + provider_type=LLMProvider.ANTHROPIC, model="claude-3-haiku", temperature=0.5, ) diff --git a/sdks/python/tests/settings/test_load_settings.py b/sdks/python/tests/settings/test_load_settings.py index 25dc88b..8fd3882 100644 --- a/sdks/python/tests/settings/test_load_settings.py +++ b/sdks/python/tests/settings/test_load_settings.py @@ -12,7 +12,7 @@ from learning_commons_evaluators.schemas.config import ( EvaluationSettings, - LlmProvider, + LLMProvider, PromptSettings, ) from learning_commons_evaluators.schemas.conventionality import ( @@ -200,7 +200,7 @@ def test_load_evaluator_settings_prompt_settings_and_prompts(tmp_path: Path) -> ).strip() ) result = load_evaluator_settings(path, _WithPromptSettings) - assert result.evaluation_settings.prompt_settings_main.provider_type == LlmProvider.GOOGLE + assert result.evaluation_settings.prompt_settings_main.provider_type == LLMProvider.GOOGLE assert result.evaluation_settings.prompt_settings_main.model == "gemini-2.0-flash" assert result.evaluation_settings.prompt_settings_main.temperature == 0.25 assert result.prompts["system_prompt"] == "hello\n\nworld"