Source code for factly.settings

"""
Settings module for Factly CLI.

Defines configuration models for API, inference, and overall application settings.
"""

from __future__ import annotations

from typing import Optional

from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict


[docs] class ModelSettings(BaseSettings): """ Configuration for the LLM API connection and model selection. Attributes: api_base (str): Base URL for the model API endpoint. model (str): Model name or identifier (e.g., 'gpt-4o'). api_key (Optional[str]): API key for authenticating with the model provider. Set to None for local models that don't require authentication. """ api_base: str = Field(default="https://api.openai.com/v1") model: str = Field(default="gpt-4o") api_key: Optional[str] = Field(default=None) model_config = SettingsConfigDict( env_file=".env", env_file_encoding="utf-8", env_prefix="OPENAI_", extra="ignore", )
[docs] @classmethod def create(cls, **kwargs) -> "ModelSettings": """ Create a ModelSettings instance with optional overrides. Args: **kwargs: Override default settings values. Returns: ModelSettings: A settings instance. """ return cls(**kwargs)
[docs] class InferenceSettings(BaseSettings): """ Inference-time parameters for LLM decoding, following MMLU best practices. Attributes: temperature (float): Sampling temperature. Default set to 0.0 to ensure deterministic, reproducible outputs by disabling sampling randomness. top_p (float): Nucleus sampling parameter. Controls how much of the probability mass the model is allowed to sample from. Default set to 1.0 to disable nucleus sampling, guaranteeing the model always selects the most probable token. max_tokens (int): Maximum tokens to generate. Default set to 256 to allow sufficient space for model reasoning. For standard MMLU, you typically want just 1 token (A/B/C/D answers), but setting max_tokens: 1 will break benchmarks if your prompts expect structured outputs (e.g., JSON) or encourage reasoning before answering. With higher max_tokens, you may need to post-process results to extract final answers. n_shots (int): Number of examples for few-shot learning. Default set to 0 for zero-shot evaluation. Increasing this value provides more demonstration examples in prompts to help the model understand the task format. Note: When using n_shots > 0, consider setting max_tokens > 1 to allow the model to follow the reasoning patterns demonstrated in few-shot examples. Setting max_tokens=1 with n_shots > 0 may cause the model to ignore the reasoning pattern in examples and only output a token. """ temperature: float = Field(default=0.0, ge=0.0, le=2.0) top_p: float = Field(default=1.0, gt=0.0, le=1.0) max_tokens: int = Field(default=256, gt=0) n_shots: int = Field(default=0, ge=0)
[docs] @classmethod def create(cls, **kwargs) -> "InferenceSettings": """ Create an InferenceSettings instance with optional overrides. Args: **kwargs: Override default settings values. Returns: InferenceSettings: A settings instance. """ return cls(**kwargs)
[docs] @classmethod def for_mmlu(cls, n_shots: int = 0) -> "InferenceSettings": """ Create inference settings configured for traditional MMLU benchmarking. Uses max_tokens=1 for single-letter answers, which is the canonical setup for standard MMLU evaluation where only a single token (A/B/C/D) is expected. Returns: InferenceSettings: MMLU-optimized settings (temperature=0, top_p=1, max_tokens=1). """ max_tokens = 256 if n_shots > 0 else 1 return cls(temperature=0.0, top_p=1.0, max_tokens=max_tokens, n_shots=n_shots)
[docs] class FactlySettings(BaseSettings): """ Aggregated settings for the Factly CLI, including model and inference configuration. Attributes: model (ModelSettings): Model API and authentication settings. inference (InferenceSettings): Inference-time decoding parameters. """ model: ModelSettings = Field(default_factory=lambda: ModelSettings()) inference: InferenceSettings = Field( default_factory=lambda: InferenceSettings.for_mmlu() )
[docs] @classmethod def create(cls, **kwargs) -> "FactlySettings": """ Create FactlySettings with optional overrides. This factory method handles nested configuration with dictionaries. Args: **kwargs: Configuration overrides including nested dictionaries for model and inference settings. Returns: FactlySettings: A settings instance. Example: >>> # Create with API key and custom temperature >>> settings = FactlySettings.create( ... model={"api_key": "sk-abc123", "model": "gpt-4o"}, ... inference={"temperature": 0.1} ... ) >>> >>> # Alternatively, update settings after creation: >>> settings = FactlySettings() >>> settings.model.api_key = "sk-abc123" >>> settings.inference.temperature = 0.1 """ return cls(**kwargs)
[docs] @classmethod def from_cli( cls, model: str | None = None, api_key: str | None = None, api_base: str | None = None, temperature: float | None = None, top_p: float | None = None, max_tokens: int | None = None, n_shots: int | None = None, ) -> "FactlySettings": """ Create settings by combining CLI arguments with environment variables. CLI arguments take precedence over environment variables and defaults. Only non-None CLI values will override settings from the environment. Args: model: Model name (e.g., "gpt-4o") api_key: API key for the model provider api_base: Base URL for the API temperature: Sampling temperature top_p: Nucleus sampling parameter max_tokens: Maximum tokens to generate n_shots: Number of examples for few-shot learning Returns: FactlySettings: Combined settings with proper priority """ inference_kwargs = {} if temperature is not None: inference_kwargs["temperature"] = temperature if top_p is not None: inference_kwargs["top_p"] = top_p if max_tokens is not None: inference_kwargs["max_tokens"] = max_tokens if n_shots is not None: inference_kwargs["n_shots"] = n_shots _inference = InferenceSettings.create(**inference_kwargs) model_kwargs = {} if api_key is not None: model_kwargs["api_key"] = api_key if api_base is not None: model_kwargs["api_base"] = api_base if model is not None: model_kwargs["model"] = model _model = ModelSettings.create(**model_kwargs) # First load settings from environment in the following order: settings = cls( model=_model, inference=_inference, ) if settings.inference.n_shots > 0 and settings.inference.max_tokens == 1: import warnings warnings.warn( "Using n_shots > 0 with max_tokens=1 may produce inconsistent results. " "Consider increasing max_tokens to allow for reasoning patterns " "demonstrated in few-shot examples." ) return settings