"""
Settings module for Factly CLI.
Defines configuration models for API, inference, and overall application settings.
"""
from __future__ import annotations
from typing import Optional
from pydantic import Field
from pydantic_settings import BaseSettings, SettingsConfigDict
[docs]
class ModelSettings(BaseSettings):
"""
Configuration for the LLM API connection and model selection.
Attributes:
api_base (str): Base URL for the model API endpoint.
model (str): Model name or identifier (e.g., 'gpt-4o').
api_key (Optional[str]): API key for authenticating with the model provider.
Set to None for local models that don't require authentication.
"""
api_base: str = Field(default="https://api.openai.com/v1")
model: str = Field(default="gpt-4o")
api_key: Optional[str] = Field(default=None)
model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
env_prefix="OPENAI_",
extra="ignore",
)
[docs]
@classmethod
def create(cls, **kwargs) -> "ModelSettings":
"""
Create a ModelSettings instance with optional overrides.
Args:
**kwargs: Override default settings values.
Returns:
ModelSettings: A settings instance.
"""
return cls(**kwargs)
[docs]
class InferenceSettings(BaseSettings):
"""
Inference-time parameters for LLM decoding, following MMLU best practices.
Attributes:
temperature (float): Sampling temperature. Default set to 0.0 to ensure
deterministic, reproducible outputs by disabling sampling randomness.
top_p (float): Nucleus sampling parameter. Controls how much of the probability
mass the model is allowed to sample from. Default set to 1.0 to disable
nucleus sampling, guaranteeing the model always selects the most probable
token.
max_tokens (int): Maximum tokens to generate. Default set to 256 to allow
sufficient space for model reasoning. For standard MMLU, you typically want
just 1 token (A/B/C/D answers), but setting max_tokens: 1 will break
benchmarks if your prompts expect structured outputs (e.g., JSON) or
encourage reasoning before answering. With higher max_tokens, you may need
to post-process results to extract final answers.
n_shots (int): Number of examples for few-shot learning. Default set to 0 for
zero-shot evaluation. Increasing this value provides more demonstration
examples in prompts to help the model understand the task format.
Note:
When using n_shots > 0, consider setting max_tokens > 1 to allow the model
to follow the reasoning patterns demonstrated in few-shot examples.
Setting max_tokens=1 with n_shots > 0 may cause the model to ignore the
reasoning pattern in examples and only output a token.
"""
temperature: float = Field(default=0.0, ge=0.0, le=2.0)
top_p: float = Field(default=1.0, gt=0.0, le=1.0)
max_tokens: int = Field(default=256, gt=0)
n_shots: int = Field(default=0, ge=0)
[docs]
@classmethod
def create(cls, **kwargs) -> "InferenceSettings":
"""
Create an InferenceSettings instance with optional overrides.
Args:
**kwargs: Override default settings values.
Returns:
InferenceSettings: A settings instance.
"""
return cls(**kwargs)
[docs]
@classmethod
def for_mmlu(cls, n_shots: int = 0) -> "InferenceSettings":
"""
Create inference settings configured for traditional MMLU benchmarking.
Uses max_tokens=1 for single-letter answers, which is the canonical setup for
standard MMLU evaluation where only a single token (A/B/C/D) is expected.
Returns:
InferenceSettings: MMLU-optimized settings
(temperature=0, top_p=1, max_tokens=1).
"""
max_tokens = 256 if n_shots > 0 else 1
return cls(temperature=0.0, top_p=1.0, max_tokens=max_tokens, n_shots=n_shots)
[docs]
class FactlySettings(BaseSettings):
"""
Aggregated settings for the Factly CLI, including model and inference configuration.
Attributes:
model (ModelSettings): Model API and authentication settings.
inference (InferenceSettings): Inference-time decoding parameters.
"""
model: ModelSettings = Field(default_factory=lambda: ModelSettings())
inference: InferenceSettings = Field(
default_factory=lambda: InferenceSettings.for_mmlu()
)
[docs]
@classmethod
def create(cls, **kwargs) -> "FactlySettings":
"""
Create FactlySettings with optional overrides.
This factory method handles nested configuration with dictionaries.
Args:
**kwargs: Configuration overrides including nested dictionaries
for model and inference settings.
Returns:
FactlySettings: A settings instance.
Example:
>>> # Create with API key and custom temperature
>>> settings = FactlySettings.create(
... model={"api_key": "sk-abc123", "model": "gpt-4o"},
... inference={"temperature": 0.1}
... )
>>>
>>> # Alternatively, update settings after creation:
>>> settings = FactlySettings()
>>> settings.model.api_key = "sk-abc123"
>>> settings.inference.temperature = 0.1
"""
return cls(**kwargs)
[docs]
@classmethod
def from_cli(
cls,
model: str | None = None,
api_key: str | None = None,
api_base: str | None = None,
temperature: float | None = None,
top_p: float | None = None,
max_tokens: int | None = None,
n_shots: int | None = None,
) -> "FactlySettings":
"""
Create settings by combining CLI arguments with environment variables.
CLI arguments take precedence over environment variables and defaults.
Only non-None CLI values will override settings from the environment.
Args:
model: Model name (e.g., "gpt-4o")
api_key: API key for the model provider
api_base: Base URL for the API
temperature: Sampling temperature
top_p: Nucleus sampling parameter
max_tokens: Maximum tokens to generate
n_shots: Number of examples for few-shot learning
Returns:
FactlySettings: Combined settings with proper priority
"""
inference_kwargs = {}
if temperature is not None:
inference_kwargs["temperature"] = temperature
if top_p is not None:
inference_kwargs["top_p"] = top_p
if max_tokens is not None:
inference_kwargs["max_tokens"] = max_tokens
if n_shots is not None:
inference_kwargs["n_shots"] = n_shots
_inference = InferenceSettings.create(**inference_kwargs)
model_kwargs = {}
if api_key is not None:
model_kwargs["api_key"] = api_key
if api_base is not None:
model_kwargs["api_base"] = api_base
if model is not None:
model_kwargs["model"] = model
_model = ModelSettings.create(**model_kwargs)
# First load settings from environment in the following order:
settings = cls(
model=_model,
inference=_inference,
)
if settings.inference.n_shots > 0 and settings.inference.max_tokens == 1:
import warnings
warnings.warn(
"Using n_shots > 0 with max_tokens=1 may produce inconsistent results. "
"Consider increasing max_tokens to allow for reasoning patterns "
"demonstrated in few-shot examples."
)
return settings